2626 _default_index , _is_sequence )
2727from pandas .core .generic import NDFrame
2828from pandas .core .index import Index , MultiIndex , _ensure_index
29- from pandas .core .indexing import _NDFrameIndexer , _maybe_droplevels
29+ from pandas .core .indexing import (_NDFrameIndexer , _maybe_droplevels ,
30+ _is_index_slice , _check_bool_indexer )
3031from pandas .core .internals import BlockManager , make_block , form_blocks
3132from pandas .core .series import Series , _radd_compat , _dtype_from_scalar
3233from pandas .compat .scipy import scoreatpercentile as _quantile
@@ -313,7 +314,7 @@ def f(self, other):
313314 return self ._combine_series_infer (other , func )
314315 else :
315316
316- # straight boolean comparisions we want to allow all columns
317+ # straight boolean comparisions we want to allow all columns
317318 # (regardless of dtype to pass thru)
318319 return self ._combine_const (other , func , raise_on_error = False ).fillna (True ).astype (bool )
319320
@@ -1972,72 +1973,52 @@ def iget_value(self, i, j):
19721973 return self .get_value (row , col )
19731974
19741975 def __getitem__ (self , key ):
1975- # slice rows
19761976 if isinstance (key , slice ):
1977- from pandas .core .indexing import _is_index_slice
1978- idx_type = self .index .inferred_type
1979- if idx_type == 'floating' :
1980- indexer = self .ix ._convert_to_indexer (key , axis = 0 )
1981- elif idx_type == 'integer' or _is_index_slice (key ):
1982- indexer = key
1983- else :
1984- indexer = self .ix ._convert_to_indexer (key , axis = 0 )
1985- new_data = self ._data .get_slice (indexer , axis = 1 )
1986- return self ._constructor (new_data )
1987- # either boolean or fancy integer index
1977+ # slice rows
1978+ return self ._getitem_slice (key )
19881979 elif isinstance (key , (np .ndarray , list )):
1989- if isinstance (key , list ):
1990- key = lib .list_to_object_array (key )
1991-
1992- # also raises Exception if object array with NA values
1993- if com ._is_bool_indexer (key ):
1994- key = np .asarray (key , dtype = bool )
1980+ # either boolean or fancy integer index
19951981 return self ._getitem_array (key )
1982+ elif isinstance (key , DataFrame ):
1983+ return self ._getitem_frame (key )
19961984 elif isinstance (self .columns , MultiIndex ):
19971985 return self ._getitem_multilevel (key )
1998- elif isinstance (key , DataFrame ):
1999- if key .values .dtype == bool :
2000- return self .where (key , try_cast = False )
2001- else :
2002- raise ValueError ('Cannot index using non-boolean DataFrame' )
20031986 else :
1987+ # get column
20041988 return self ._get_item_cache (key )
20051989
1990+ def _getitem_slice (self , key ):
1991+ idx_type = self .index .inferred_type
1992+ if idx_type == 'floating' :
1993+ indexer = self .ix ._convert_to_indexer (key , axis = 0 )
1994+ elif idx_type == 'integer' or _is_index_slice (key ):
1995+ indexer = key
1996+ else :
1997+ indexer = self .ix ._convert_to_indexer (key , axis = 0 )
1998+ return self ._slice (indexer , axis = 0 )
1999+
20062000 def _getitem_array (self , key ):
2007- if key .dtype == np .bool_ :
2008- if len (key ) != len (self .index ):
2001+ # also raises Exception if object array with NA values
2002+ if com ._is_bool_indexer (key ):
2003+ # warning here just in case -- previously __setitem__ was
2004+ # reindexing but __getitem__ was not; it seems more reasonable to
2005+ # go with the __setitem__ behavior since that is more consistent
2006+ # with all other indexing behavior
2007+ if isinstance (key , Series ) and not key .index .equals (self .index ):
2008+ import warnings
2009+ warnings .warn ("Boolean Series key will be reindexed to match "
2010+ "DataFrame index." , UserWarning )
2011+ elif len (key ) != len (self .index ):
20092012 raise ValueError ('Item wrong length %d instead of %d!' %
20102013 (len (key ), len (self .index )))
2011-
2012- inds , = key .nonzero ()
2013- return self .take (inds )
2014- else :
2015- if self .columns .is_unique :
2016- indexer = self .columns .get_indexer (key )
2017- mask = indexer == - 1
2018- if mask .any ():
2019- raise KeyError ("No column(s) named: %s" %
2020- com .pprint_thing (key [mask ]))
2021- result = self .reindex (columns = key )
2022- if result .columns .name is None :
2023- result .columns .name = self .columns .name
2024- return result
2025- else :
2026- mask = self .columns .isin (key )
2027- for k in key :
2028- if k not in self .columns :
2029- raise KeyError ("No column(s) named: %s" %
2030- com .pprint_thing (k ))
2031- return self .take (mask .nonzero ()[0 ], axis = 1 )
2032-
2033- def _slice (self , slobj , axis = 0 ):
2034- if axis == 0 :
2035- mgr_axis = 1
2014+ # _check_bool_indexer will throw exception if Series key cannot
2015+ # be reindexed to match DataFrame rows
2016+ key = _check_bool_indexer (self .index , key )
2017+ indexer = key .nonzero ()[0 ]
2018+ return self .take (indexer , axis = 0 )
20362019 else :
2037- mgr_axis = 0
2038-
2039- new_data = self ._data .get_slice (slobj , axis = mgr_axis )
2040- return self ._constructor (new_data )
2020+ indexer = self .ix ._convert_to_indexer (key , axis = 1 )
2021+ return self .take (indexer , axis = 1 )
20412022
20422023 def _getitem_multilevel (self , key ):
20432024 loc = self .columns .get_loc (key )
@@ -2063,6 +2044,20 @@ def _getitem_multilevel(self, key):
20632044 else :
20642045 return self ._get_item_cache (key )
20652046
2047+ def _getitem_frame (self , key ):
2048+ if key .values .dtype != np .bool_ :
2049+ raise ValueError ('Must pass DataFrame with boolean values only' )
2050+ return self .where (key )
2051+
2052+ def _slice (self , slobj , axis = 0 ):
2053+ if axis == 0 :
2054+ mgr_axis = 1
2055+ else :
2056+ mgr_axis = 0
2057+
2058+ new_data = self ._data .get_slice (slobj , axis = mgr_axis )
2059+ return self ._constructor (new_data )
2060+
20662061 def _box_item_values (self , key , values ):
20672062 items = self .columns [self .columns .get_loc (key )]
20682063 if values .ndim == 2 :
@@ -2096,34 +2091,56 @@ def __setattr__(self, name, value):
20962091 object .__setattr__ (self , name , value )
20972092
20982093 def __setitem__ (self , key , value ):
2099- # support boolean setting with DataFrame input, e.g.
2100- # df[df > df2] = 0
2101- if isinstance (key , DataFrame ):
2102- self ._boolean_set (key , value )
2094+ if isinstance (key , slice ):
2095+ # slice rows
2096+ self ._setitem_slice (key , value )
21032097 elif isinstance (key , (np .ndarray , list )):
2104- return self ._set_item_multiple (key , value )
2098+ self ._setitem_array (key , value )
2099+ elif isinstance (key , DataFrame ):
2100+ self ._setitem_frame (key , value )
21052101 else :
21062102 # set column
21072103 self ._set_item (key , value )
21082104
2109- def _boolean_set (self , key , value ):
2110- if key .values .dtype != np .bool_ :
2111- raise ValueError ('Must pass DataFrame with boolean values only' )
2112- self .where (- key , value , inplace = True )
2105+ def _setitem_slice (self , key , value ):
2106+ idx_type = self .index .inferred_type
2107+ if idx_type == 'floating' :
2108+ indexer = self .ix ._convert_to_indexer (key , axis = 0 )
2109+ elif idx_type == 'integer' or _is_index_slice (key ):
2110+ indexer = key
2111+ else :
2112+ indexer = self .ix ._convert_to_indexer (key , axis = 0 )
2113+ self .ix ._setitem_with_indexer (indexer , value )
21132114
2114- def _set_item_multiple (self , keys , value ):
2115- if isinstance (value , DataFrame ):
2116- if len (value .columns ) != len (keys ):
2117- raise AssertionError ('Columns must be same length as keys' )
2118- for k1 , k2 in zip (keys , value .columns ):
2119- self [k1 ] = value [k2 ]
2115+ def _setitem_array (self , key , value ):
2116+ # also raises Exception if object array with NA values
2117+ if com ._is_bool_indexer (key ):
2118+ if len (key ) != len (self .index ):
2119+ raise ValueError ('Item wrong length %d instead of %d!' %
2120+ (len (key ), len (self .index )))
2121+ key = _check_bool_indexer (self .index , key )
2122+ indexer = key .nonzero ()[0 ]
2123+ self .ix ._setitem_with_indexer (indexer , value )
21202124 else :
2121- if isinstance (keys , np .ndarray ) and keys .dtype == np .bool_ :
2122- # boolean slicing should happen on rows, consistent with
2123- # behavior of getitem
2124- self .ix [keys , :] = value
2125+ if isinstance (value , DataFrame ):
2126+ if len (value .columns ) != len (key ):
2127+ raise AssertionError ('Columns must be same length as key' )
2128+ for k1 , k2 in zip (key , value .columns ):
2129+ self [k1 ] = value [k2 ]
21252130 else :
2126- self .ix [:, keys ] = value
2131+ indexer = self .ix ._convert_to_indexer (key , axis = 1 )
2132+ self .ix ._setitem_with_indexer ((slice (None ), indexer ), value )
2133+
2134+ def _setitem_frame (self , key , value ):
2135+ # support boolean setting with DataFrame input, e.g.
2136+ # df[df > df2] = 0
2137+ if key .values .dtype != np .bool_ :
2138+ raise ValueError ('Must pass DataFrame with boolean values only' )
2139+
2140+ if self ._is_mixed_type :
2141+ raise ValueError ('Cannot do boolean setting on mixed-type frame' )
2142+
2143+ self .where (- key , value , inplace = True )
21272144
21282145 def _set_item (self , key , value ):
21292146 """
@@ -2918,7 +2935,7 @@ def take(self, indices, axis=0):
29182935 """
29192936 if isinstance (indices , list ):
29202937 indices = np .array (indices )
2921- if self ._data . is_mixed_dtype () :
2938+ if self ._is_mixed_type :
29222939 if axis == 0 :
29232940 new_data = self ._data .take (indices , axis = 1 )
29242941 return DataFrame (new_data )
@@ -3247,7 +3264,7 @@ def sortlevel(self, level=0, axis=0, ascending=True, inplace=False):
32473264
32483265 new_axis , indexer = the_axis .sortlevel (level , ascending = ascending )
32493266
3250- if self ._data . is_mixed_dtype () and not inplace :
3267+ if self ._is_mixed_type and not inplace :
32513268 if axis == 0 :
32523269 return self .reindex (index = new_axis )
32533270 else :
@@ -3472,7 +3489,7 @@ def replace(self, to_replace, value=None, method='pad', axis=0,
34723489 'in length. Expecting %d got %d ' %
34733490 (len (to_replace ), len (value )))
34743491
3475- new_data = self ._data .replace_list (to_replace , value ,
3492+ new_data = self ._data .replace_list (to_replace , value ,
34763493 inplace = inplace )
34773494
34783495 else : # [NA, ''] -> 0
@@ -5055,7 +5072,7 @@ def clip(self, lower=None, upper=None):
50555072 # GH 2747 (arguments were reversed)
50565073 if lower is not None and upper is not None :
50575074 lower , upper = min (lower ,upper ), max (lower ,upper )
5058-
5075+
50595076 return self .apply (lambda x : x .clip (lower = lower , upper = upper ))
50605077
50615078 def clip_upper (self , threshold ):
@@ -5246,25 +5263,22 @@ def where(self, cond, other=NA, inplace=False, try_cast=False, raise_on_error=Tr
52465263 -------
52475264 wh : DataFrame
52485265 """
5249- if not hasattr (cond , 'shape' ):
5250- raise ValueError ('where requires an ndarray like object for its '
5251- 'condition' )
5252-
5253- if isinstance (cond , np .ndarray ):
5266+ if isinstance (cond , DataFrame ):
5267+ # this already checks for index/column equality
5268+ cond = cond .reindex (self .index , columns = self .columns )
5269+ else :
5270+ if not hasattr (cond , 'shape' ):
5271+ raise ValueError ('where requires an ndarray like object for its '
5272+ 'condition' )
52545273 if cond .shape != self .shape :
52555274 raise ValueError ('Array conditional must be same shape as self' )
52565275 cond = self ._constructor (cond , index = self .index ,
52575276 columns = self .columns )
52585277
5259- if cond .shape != self .shape :
5260- cond = cond .reindex (self .index , columns = self .columns )
5261-
5262- if inplace :
5263- cond = - (cond .fillna (True ).astype (bool ))
5264- else :
5265- cond = cond .fillna (False ).astype (bool )
5266- elif inplace :
5267- cond = - cond
5278+ if inplace :
5279+ cond = - (cond .fillna (True ).astype (bool ))
5280+ else :
5281+ cond = cond .fillna (False ).astype (bool )
52685282
52695283 if isinstance (other , DataFrame ):
52705284 _ , other = self .align (other , join = 'left' , fill_value = NA )
0 commit comments