Skip to content
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
Basic working implementation
  • Loading branch information
Nick Eubank committed Oct 29, 2015
commit 9a4a6c4d1e7b59a44d6232f122d7dfb5baca76bb
9 changes: 0 additions & 9 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -2244,7 +2244,6 @@ def __setitem__(self, key, value):
self._set_item(key, value)

def _setitem_slice(self, key, value):
self._check_setitem_copy()
self.ix._setitem_with_indexer(key, value)

def _setitem_array(self, key, value):
Expand All @@ -2255,7 +2254,6 @@ def _setitem_array(self, key, value):
(len(key), len(self.index)))
key = check_bool_indexer(self.index, key)
indexer = key.nonzero()[0]
self._check_setitem_copy()
self.ix._setitem_with_indexer(indexer, value)
else:
if isinstance(value, DataFrame):
Expand All @@ -2265,7 +2263,6 @@ def _setitem_array(self, key, value):
self[k1] = value[k2]
else:
indexer = self.ix._convert_to_indexer(key, axis=1)
self._check_setitem_copy()
self.ix._setitem_with_indexer((slice(None), indexer), value)

def _setitem_frame(self, key, value):
Expand All @@ -2275,7 +2272,6 @@ def _setitem_frame(self, key, value):
raise TypeError('Must pass DataFrame with boolean values only')

self._check_inplace_setting(value)
self._check_setitem_copy()
self.where(-key, value, inplace=True)

def _ensure_valid_index(self, value):
Expand Down Expand Up @@ -2311,11 +2307,6 @@ def _set_item(self, key, value):
value = self._sanitize_column(key, value)
NDFrame._set_item(self, key, value)

# check if we are modifying a copy
# try to set first as we want an invalid
# value exeption to occur first
if len(self):
self._check_setitem_copy()

def insert(self, loc, column, value, allow_duplicates=False):
"""
Expand Down
123 changes: 24 additions & 99 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ class NDFrame(PandasObject):
_internal_names = ['_data', '_cacher', '_item_cache', '_cache',
'is_copy', '_subtyp', '_index',
'_default_kind', '_default_fill_value', '_metadata',
'__array_struct__', '__array_interface__']
'__array_struct__', '__array_interface__', '_children']
_internal_names_set = set(_internal_names)
_accessors = frozenset([])
_metadata = []
Expand All @@ -105,6 +105,8 @@ def __init__(self, data, axes=None, copy=False, dtype=None,
object.__setattr__(self, 'is_copy', None)
object.__setattr__(self, '_data', data)
object.__setattr__(self, '_item_cache', {})
object.__setattr__(self, '_children', [])


def _validate_dtype(self, dtype):
""" validate the passed dtype """
Expand Down Expand Up @@ -1174,9 +1176,6 @@ def _maybe_update_cacher(self, clear=False, verify_is_copy=True):
except:
pass

if verify_is_copy:
self._check_setitem_copy(stacklevel=5, t='referant')

if clear:
self._clear_item_cache()

Expand All @@ -1201,9 +1200,16 @@ def _slice(self, slobj, axis=0, kind=None):
# but only in a single-dtyped view slicable case
is_copy = axis!=0 or result._is_view
result._set_is_copy(self, copy=is_copy)

self._children.append(weakref.ref(result))

return result

def _set_item(self, key, value):

# If children are views, reset to copies before setting.
self._convert_views_to_copies()

self._data.set(key, value)
self._clear_item_cache()

Expand All @@ -1216,103 +1222,21 @@ def _set_is_copy(self, ref=None, copy=True):
else:
self.is_copy = None

def _check_is_chained_assignment_possible(self):
"""
check if we are a view, have a cacher, and are of mixed type
if so, then force a setitem_copy check
def _convert_views_to_copies(self):
# Don't set on views.
if self._is_view:
self._data = self._data.copy()

should be called just near setting a value
# Before setting values, make sure children converted to copies.
for child in self._children:

will return a boolean if it we are a view and are cached, but a single-dtype
meaning that the cacher should be updated following setting
"""
if self._is_view and self._is_cached:
ref = self._get_cacher()
if ref is not None and ref._is_mixed_type:
self._check_setitem_copy(stacklevel=4, t='referant', force=True)
return True
elif self.is_copy:
self._check_setitem_copy(stacklevel=4, t='referant')
return False

def _check_setitem_copy(self, stacklevel=4, t='setting', force=False):
"""

Parameters
----------
stacklevel : integer, default 4
the level to show of the stack when the error is output
t : string, the type of setting error
force : boolean, default False
if True, then force showing an error

validate if we are doing a settitem on a chained copy.

If you call this function, be sure to set the stacklevel such that the
user will see the error *at the level of setting*

It is technically possible to figure out that we are setting on
a copy even WITH a multi-dtyped pandas object. In other words, some blocks
may be views while other are not. Currently _is_view will ALWAYS return False
for multi-blocks to avoid having to handle this case.

df = DataFrame(np.arange(0,9), columns=['count'])
df['group'] = 'b'

# this technically need not raise SettingWithCopy if both are view (which is not
# generally guaranteed but is usually True
# however, this is in general not a good practice and we recommend using .loc
df.iloc[0:5]['group'] = 'a'

"""

if force or self.is_copy:

value = config.get_option('mode.chained_assignment')
if value is None:
return

# see if the copy is not actually refererd; if so, then disolve
# the copy weakref
try:
gc.collect(2)
if not gc.get_referents(self.is_copy()):
self.is_copy = None
return
except:
pass

# we might be a false positive
try:
if self.is_copy().shape == self.shape:
self.is_copy = None
return
except:
pass

# a custom message
if isinstance(self.is_copy, string_types):
t = self.is_copy

elif t == 'referant':
t = ("\n"
"A value is trying to be set on a copy of a slice from a "
"DataFrame\n\n"
"See the caveats in the documentation: "
"http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy")

else:
t = ("\n"
"A value is trying to be set on a copy of a slice from a "
"DataFrame.\n"
"Try using .loc[row_indexer,col_indexer] = value instead\n\n"
"See the caveats in the documentation: "
"http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy")

if value == 'raise':
raise SettingWithCopyError(t)
elif value == 'warn':
warnings.warn(t, SettingWithCopyWarning, stacklevel=stacklevel)
# Make sure children of children converted.
child()._convert_views_to_copies()

if child()._is_view:
child()._data = child()._data.copy()

self._children=[]

def __delitem__(self, key):
"""
Expand Down Expand Up @@ -2252,6 +2176,7 @@ def __setattr__(self, name, value):
# e.g. ``obj.x`` and ``obj.x = 4`` will always reference/modify
# the same attribute.


try:
object.__getattribute__(self, name)
return object.__setattr__(self, name, value)
Expand Down
6 changes: 4 additions & 2 deletions pandas/core/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,9 @@ def _get_setitem_indexer(self, key):
raise IndexingError(key)

def __setitem__(self, key, value):
# Make sure changes don't propagate to children
self.obj._convert_views_to_copies()

indexer = self._get_setitem_indexer(key)
self._setitem_with_indexer(indexer, value)

Expand Down Expand Up @@ -199,6 +202,7 @@ def _has_valid_positional_setitem_indexer(self, indexer):
def _setitem_with_indexer(self, indexer, value):
self._has_valid_setitem_indexer(indexer)


# also has the side effect of consolidating in-place
from pandas import Panel, DataFrame, Series
info_axis = self.obj._info_axis_number
Expand Down Expand Up @@ -508,8 +512,6 @@ def can_do_equal_len():
if isinstance(value, ABCPanel):
value = self._align_panel(indexer, value)

# check for chained assignment
self.obj._check_is_chained_assignment_possible()

# actually do the set
self.obj._consolidate_inplace()
Expand Down
3 changes: 0 additions & 3 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -714,10 +714,7 @@ def setitem(key, value):
self._set_with(key, value)

# do the setitem
cacher_needs_updating = self._check_is_chained_assignment_possible()
setitem(key, value)
if cacher_needs_updating:
self._maybe_update_cacher()

def _set_with_engine(self, key, value):
values = self._values
Expand Down
79 changes: 79 additions & 0 deletions pandas/tests/test_generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -1681,6 +1681,85 @@ def test_set_attribute(self):
assert_equal(df.y, 5)
assert_series_equal(df['y'], Series([2, 4, 6], name='y'))

def test_copy_on_write(self):

#######
# FORWARD PROPAGATION TESTS
#######

##
# Test children recorded from various slicing methods
##

df = pd.DataFrame({'col1':[1,2], 'col2':[3,4]})
self.assertTrue(len(df._children)==0)


views = dict()

views['loc'] = df.loc[0:0,]
views['iloc'] = df.iloc[0:1,]
views['ix'] = df.ix[0:0,]
views['loc_of_loc'] = views['loc'].loc[0:0,]

copies = dict()
for v in views.keys():
self.assertTrue(views[v]._is_view)
copies[v] = views[v].copy()



df.loc[0,'col1'] = -88

for v in views.keys():
tm.assert_frame_equal(views[v], copies[v])
self.assertFalse(views[v]._is_view)

##
# Test views become copies
# during different forms of value setting.
##

parent = dict()
views = dict()
copies = dict()
for v in ['loc', 'iloc', 'ix', 'column', 'attribute']:
parent[v] = pd.DataFrame({'col1':[1,2], 'col2':[3,4]})
views[v] = parent[v].loc[0:0,]
copies[v] = views[v].copy()
self.assertTrue( views[v]._is_view )

parent['loc'].loc[0, 'col1'] = -88
parent['iloc'].iloc[0, 0] = -88
parent['ix'].ix[0, 'col1'] = -88
parent['column']['col1'] = -88
parent['attribute'].col1 = -88


for v in views.keys():
tm.assert_frame_equal(views[v], copies[v])
self.assertFalse(views[v]._is_view)



########
# No Backward Propogation
#######
df = pd.DataFrame({'col1':[1,2], 'col2':[3,4]})
df_copy = df.copy()

views = dict()

views['loc'] = df.loc[0:0,]
views['iloc'] = df.iloc[0:1,]
views['ix'] = df.ix[0:0,]
views['loc_of_loc'] = views['loc'].loc[0:0,]

for v in views.keys():
views[v].loc[0:0,] = -99

tm.assert_frame_equal(df, df_copy)


class TestPanel(tm.TestCase, Generic):
_typ = Panel
Expand Down