Skip to content
Prev Previous commit
Next Next commit
make _is_view more conservative on multi-block objects
tweak so addition of new columnsdoesn't cause copying improved tests, removed copy option from merge
  • Loading branch information
Nick Eubank committed Jan 13, 2016
commit a5f64fc75f907f2c96e2a12e5581966d7da5958b
4 changes: 2 additions & 2 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -4370,12 +4370,12 @@ def _join_compat(self, other, on=None, how='left', lsuffix='', rsuffix='',
@Appender(_merge_doc, indents=2)
def merge(self, right, how='inner', on=None, left_on=None, right_on=None,
left_index=False, right_index=False, sort=False,
suffixes=('_x', '_y'), copy=True, indicator=False):
suffixes=('_x', '_y'), indicator=False):
from pandas.tools.merge import merge
return merge(self, right, how=how, on=on,
left_on=left_on, right_on=right_on,
left_index=left_index, right_index=right_index, sort=sort,
suffixes=suffixes, copy=copy, indicator=indicator)
suffixes=suffixes, indicator=indicator)

def round(self, decimals=0, out=None):
"""
Expand Down
16 changes: 10 additions & 6 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -1091,9 +1091,6 @@ def get(self, key, default=None):
def __getitem__(self, item):
result = self._get_item_cache(item)

if isinstance(item, str):
result._is_column_view = True

return result

def _get_item_cache(self, item):
Expand Down Expand Up @@ -1218,10 +1215,14 @@ def _slice(self, slobj, axis=0, kind=None):
return result

def _set_item(self, key, value):

# If children are views, reset to copies before setting.
self._execute_copy_on_write()

if hasattr(self, 'columns'):
if key in self.columns:
# If children are views, reset to copies before setting.
self._execute_copy_on_write()
else:
self._execute_copy_on_write()

self._data.set(key, value)
self._clear_item_cache()

Expand Down Expand Up @@ -2339,6 +2340,9 @@ def __setattr__(self, name, value):
# e.g. ``obj.x`` and ``obj.x = 4`` will always reference/modify
# the same attribute.

if hasattr(self, 'columns'):
if name in self.columns:
self._execute_copy_on_write()

try:
object.__getattribute__(self, name)
Expand Down
9 changes: 5 additions & 4 deletions pandas/core/internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -2933,10 +2933,11 @@ def is_datelike_mixed_type(self):

@property
def is_view(self):
""" return a boolean if we are a single block and are a view """
if len(self.blocks) == 1:
return self.blocks[0].is_view

""" return a boolean True if any block is a view """
for b in self.blocks:
if b.is_view: return True


# It is technically possible to figure out which blocks are views
# e.g. [ b.values.base is not None for b in self.blocks ]
# but then we have the case of possibly some blocks being a view
Expand Down
4 changes: 3 additions & 1 deletion pandas/core/panel.py
Original file line number Diff line number Diff line change
Expand Up @@ -1339,8 +1339,10 @@ def update(self, other, join='left', overwrite=True, filter_func=None,
other = other.reindex(**{axis_name: axis_values})

for frame in axis_values:
self[frame].update(other[frame], join, overwrite, filter_func,
temp = self[frame]
temp.update(other[frame], join, overwrite, filter_func,
raise_conflict)
self[frame] = temp

def _get_join_index(self, other, how):
if how == 'left':
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/reshape.py
Original file line number Diff line number Diff line change
Expand Up @@ -945,7 +945,7 @@ def melt_stub(df, stub, i, j):

for stub in stubnames[1:]:
new = melt_stub(df, stub, id_vars, j)
newdf = newdf.merge(new, how="outer", on=id_vars + [j], copy=False)
newdf = newdf.merge(new, how="outer", on=id_vars + [j])
return newdf.set_index([i, j])

def get_dummies(data, prefix=None, prefix_sep='_', dummy_na=False,
Expand Down
39 changes: 13 additions & 26 deletions pandas/tests/test_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,6 @@ def test_setitem_list(self):

assert_series_equal(self.frame['B'], data['A'], check_names=False)
assert_series_equal(self.frame['A'], data['B'], check_names=False)

with assertRaisesRegexp(ValueError, 'Columns must be same length as key'):
data[['A']] = self.frame[['A', 'B']]
with assertRaisesRegexp(ValueError, 'Length of values does not match '
Expand Down Expand Up @@ -561,14 +560,14 @@ def test_setitem(self):
self.frame['col8'] = 'foo'
assert((self.frame['col8'] == 'foo').all())

# this is partially a view (e.g. some blocks are view)
# so raise/warn
# Changes should not propageate
smaller = self.frame[:2]
def f():
smaller['col10'] = ['1', '2']
self.assertRaises(com.SettingWithCopyError, f)
self.assertEqual(smaller['col10'].dtype, np.object_)
self.assertTrue((smaller['col10'] == ['1', '2']).all())
smaller['col0'] = ['1', '2']
f()
self.assertEqual(smaller['col0'].dtype, np.object_)
self.assertTrue((smaller['col0'] == ['1', '2']).all())
self.assertNotEqual(self.frame[:2].col0.dtype, np.object_)

# with a dtype
for dtype in ['int32','int64','float32','float64']:
Expand Down Expand Up @@ -1022,13 +1021,11 @@ def test_fancy_getitem_slice_mixed(self):
sliced = self.mixed_frame.ix[:, -3:]
self.assertEqual(sliced['D'].dtype, np.float64)

# get view with single block
# setting it triggers setting with copy
# Should never act as view due to copy on write
sliced = self.frame.ix[:, -3:]
def f():
sliced['C'] = 4.
self.assertRaises(com.SettingWithCopyError, f)
self.assertTrue((self.frame['C'] == 4).all())
sliced['C'] = 4
self.assertTrue((self.frame['C'] != 4).all())

def test_fancy_setitem_int_labels(self):
# integer index defers to label-based indexing
Expand Down Expand Up @@ -1827,14 +1824,12 @@ def test_irow(self):
expected = df.ix[8:14]
assert_frame_equal(result, expected)

# verify slice is view
# setting it makes it raise/warn
# verify changes on slices never propogate
def f():
result[2] = 0.
self.assertRaises(com.SettingWithCopyError, f)
exp_col = df[2].copy()
exp_col[4:8] = 0.
assert_series_equal(df[2], exp_col)
self.assertFalse((df[2] == exp_col).all())

# list of integers
result = df.iloc[[1, 2, 4, 6]]
Expand Down Expand Up @@ -1862,12 +1857,10 @@ def test_icol(self):
expected = df.ix[:, 8:14]
assert_frame_equal(result, expected)

# verify slice is view
# and that we are setting a copy
# Verify setting on view doesn't propogate
def f():
result[8] = 0.
self.assertRaises(com.SettingWithCopyError, f)
self.assertTrue((df[8] == 0).all())
self.assertTrue((df[8] != 0).all())

# list of integers
result = df.iloc[:, [1, 2, 4, 6]]
Expand Down Expand Up @@ -4343,12 +4336,6 @@ def test_constructor_with_datetime_tz(self):
assert_series_equal(df['D'],Series(idx,name='D'))
del df['D']

# assert that A & C are not sharing the same base (e.g. they
# are copies)
b1 = df._data.blocks[1]
b2 = df._data.blocks[2]
self.assertTrue(b1.values.equals(b2.values))
self.assertFalse(id(b1.values.values.base) == id(b2.values.values.base))

# with nan
df2 = df.copy()
Expand Down
27 changes: 25 additions & 2 deletions pandas/tests/test_generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -1740,7 +1740,6 @@ def test_copy_on_write(self):
copies[v] = views[v].copy()



df.loc[0,'col1'] = -88

for v in views.keys():
Expand Down Expand Up @@ -1807,6 +1806,23 @@ def test_copy_on_write(self):
df.loc[0, 'col1']=-88
self.assertTrue(v.loc[0] == -88)
self.assertTrue(v._is_view)

# Does NOT hold for multi-index (can't guarantee view behaviors --
# setting on multi-index creates new data somehow.)
index = pd.MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'],
['one', 'two', 'three']],
labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
[0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
names=['first', 'second'])
frame = pd.DataFrame(np.random.randn(10, 3), index=index,
columns=pd.Index(['A', 'B', 'C'], name='exp')).T

v = frame['foo','one']
self.assertTrue(v._is_view)
self.assertFalse(v._is_column_view)
frame.loc['A', ('foo','one')]=-88
self.assertFalse(v.loc['A'] == -88)


###
# Make sure that no problems if view created on view and middle-view
Expand All @@ -1827,7 +1843,14 @@ def test_copy_on_write(self):
tm.assert_frame_equal(v2, v2_copy)



def test_is_view_of_multiblocks(self):
# Ensure that if even if only one block of DF is view,
# returns _is_view = True.
df = pd.DataFrame({'col1':[1,2], 'col2':[3,4]})
s = pd.Series([0.5, 0.3, 0.4])
df['col3'] = s[0:1]
self.assertTrue(df['col3']._is_view)
self.assertTrue(df._is_view)

class TestPanel(tm.TestCase, Generic):
_typ = Panel
Expand Down
Loading