-
- Notifications
You must be signed in to change notification settings - Fork 19.4k
Subclassing improvements #12308
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Subclassing improvements #12308
Changes from all commits
e53ab65 5afd6a2 a41cd31 87d5e86 f515b27 d7310be 910c17e c784311 7896d55 04aed78 File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| | @@ -168,6 +168,9 @@ def _use_inf_as_null(key): | |
| | ||
| def _isnull_ndarraylike(obj): | ||
| | ||
| if isinstance(obj, pd.SparseSeries): | ||
| Contributor There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why are you adding this? | ||
| obj = pd.SparseArray(obj) | ||
| | ||
| values = getattr(obj, 'values', obj) | ||
| dtype = values.dtype | ||
| | ||
| | @@ -197,8 +200,8 @@ def _isnull_ndarraylike(obj): | |
| | ||
| # box | ||
| if isinstance(obj, gt.ABCSeries): | ||
| from pandas import Series | ||
| result = Series(result, index=obj.index, name=obj.name, copy=False) | ||
| result = obj._constructor( | ||
| result, index=obj.index, name=obj.name, copy=False) | ||
| | ||
| return result | ||
| | ||
| | @@ -226,8 +229,8 @@ def _isnull_ndarraylike_old(obj): | |
| | ||
| # box | ||
| if isinstance(obj, gt.ABCSeries): | ||
| from pandas import Series | ||
| result = Series(result, index=obj.index, name=obj.name, copy=False) | ||
| result = obj._constructor( | ||
| result, index=obj.index, name=obj.name, copy=False) | ||
| Contributor There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. these are changes which I would accept, e.g. being more generic. really really try hard not to do if/thens. | ||
| | ||
| return result | ||
| | ||
| | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| | @@ -1773,7 +1773,6 @@ def xs(self, key, axis=0, level=None, copy=None, drop_level=True): | |
| result = self._constructor_sliced(new_values, index=self.columns, | ||
| name=self.index[loc], copy=copy, | ||
| dtype=new_values.dtype) | ||
| | ||
| else: | ||
| result = self.iloc[loc] | ||
| result.index = new_index | ||
| | @@ -4898,7 +4897,12 @@ def describe_numeric_1d(series): | |
| formatted_percentiles + ['max']) | ||
| d = ([series.count(), series.mean(), series.std(), series.min()] + | ||
| [series.quantile(x) for x in percentiles] + [series.max()]) | ||
| return pd.Series(d, index=stat_index, name=series.name) | ||
| if isinstance(self, ABCSeries): | ||
| return self._constructor( | ||
| d, index=stat_index, name=series.name) | ||
| else: | ||
| return self._constructor_sliced( | ||
| Contributor There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this doesn't make much sense.you would need more axes here and/or different meta-data. why are you doing this? | ||
| d, index=stat_index, name=series.name) | ||
| | ||
| def describe_categorical_1d(data): | ||
| names = ['count', 'unique'] | ||
| | @@ -4918,7 +4922,12 @@ def describe_categorical_1d(data): | |
| names += ['top', 'freq'] | ||
| result += [top, freq] | ||
| | ||
| return pd.Series(result, index=names, name=data.name) | ||
| if isinstance(self, ABCSeries): | ||
| return self._constructor( | ||
| result, index=names, name=data.name) | ||
| else: | ||
| Contributor There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same here (e.g. I can buy | ||
| return self._constructor_sliced( | ||
| result, index=names, name=data.name) | ||
| | ||
| def describe_1d(data): | ||
| if com.is_bool_dtype(data): | ||
| | @@ -4957,7 +4966,7 @@ def describe_1d(data): | |
| | ||
| d = pd.concat(ldesc, join_axes=pd.Index([names]), axis=1) | ||
| d.columns = data.columns.copy() | ||
| return d | ||
| return self._constructor(d) | ||
| Contributor There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this results in additional construction overhead. Better to add some logic to Author There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Actually, I had already added the logic to concat, so the constructor was unnecessary here. I have removed it. | ||
| | ||
| def _check_percentile(self, q): | ||
| """Validate percentiles (used by describe and quantile).""" | ||
| | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| | @@ -36,8 +36,11 @@ | |
| is_categorical_dtype, _values_from_object, | ||
| is_datetime_or_timedelta_dtype, is_bool, | ||
| is_bool_dtype, AbstractMethodError, | ||
| _maybe_fill) | ||
| _maybe_fill, ABCSeries) | ||
| from pandas.core.config import option_context, is_callable | ||
| | ||
| from pandas.sparse.frame import SparseDataFrame | ||
| | ||
| import pandas.lib as lib | ||
| from pandas.lib import Timestamp | ||
| import pandas.tslib as tslib | ||
| | @@ -340,6 +343,16 @@ def __init__(self, obj, keys=None, axis=0, level=None, | |
| if axis != 0: | ||
| raise ValueError('as_index=False only valid for axis=0') | ||
| | ||
| self.inputconstructor = obj._constructor | ||
| if isinstance(obj, ABCSeries): | ||
| self.inputconstructor_sliced = obj._constructor | ||
| Contributor There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this is crazy? what is going on here? | ||
| else: | ||
| self.inputconstructor_sliced = obj._constructor_sliced | ||
| if isinstance(obj, Panel): | ||
| self.inputconstructor_expanddim = obj._constructor | ||
| else: | ||
| self.inputconstructor_expanddim = obj._constructor_expanddim | ||
| self.lenshape = obj.ndim | ||
| self.as_index = as_index | ||
| self.keys = keys | ||
| self.sort = sort | ||
| | @@ -393,6 +406,38 @@ def indices(self): | |
| self._assure_grouper() | ||
| return self.grouper.indices | ||
| | ||
| @property | ||
| ||
| def inputconstructor(self): | ||
| return self._inputconstructor | ||
| | ||
| @inputconstructor.setter | ||
| def inputconstructor(self, constructor): | ||
| self._inputconstructor = constructor | ||
| | ||
| Contributor There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this naming schema is not good. something more along the lines of | ||
| @property | ||
| def inputconstructor_sliced(self): | ||
| return self._inputconstructor_sliced | ||
| | ||
| @inputconstructor_sliced.setter | ||
| def inputconstructor_sliced(self, constructor): | ||
| self._inputconstructor_sliced = constructor | ||
| Contributor There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why are you saving this? | ||
| | ||
| @property | ||
| def inputconstructor_expanddim(self): | ||
| return self._inputconstructor_expanddim | ||
| | ||
| @inputconstructor_expanddim.setter | ||
| def inputconstructor_expanddim(self, constructor): | ||
| self._inputconstructor_expanddim = constructor | ||
| | ||
| @property | ||
| Contributor There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. what the heck is this? | ||
| def lenshape(self): | ||
| return self._lenshape | ||
| | ||
| @lenshape.setter | ||
| def lenshape(self, lenshape_in): | ||
| self._lenshape = lenshape_in | ||
| | ||
| def _get_indices(self, names): | ||
| """ | ||
| safe get multiple indices, translate keys for | ||
| | @@ -1328,7 +1373,7 @@ def cumcount(self, ascending=True): | |
| | ||
| index = self._selected_obj.index | ||
| cumcounts = self._cumcount_array(ascending=ascending) | ||
| return Series(cumcounts, index) | ||
| return self.inputconstructor_sliced(cumcounts, index) | ||
| | ||
| @Substitution(name='groupby') | ||
| @Appender(_doc_template) | ||
| | @@ -2603,7 +2648,7 @@ def aggregate(self, func_or_funcs, *args, **kwargs): | |
| result = self._aggregate_named(func_or_funcs, *args, **kwargs) | ||
| | ||
| index = Index(sorted(result), name=self.grouper.names[0]) | ||
| ret = Series(result, index=index) | ||
| ret = self.inputconstructor(result, index=index) | ||
| | ||
| if not self.as_index: # pragma: no cover | ||
| print('Warning, ignoring as_index=True') | ||
| | @@ -2659,19 +2704,25 @@ def _aggregate_multiple_funcs(self, arg, _level): | |
| if _level: | ||
| return results | ||
| return list(compat.itervalues(results))[0] | ||
| return DataFrame(results, columns=columns) | ||
| | ||
| if self.lenshape == 2: | ||
| Contributor There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. how is the ever not 2d? | ||
| return self.inputconstructor(results, columns=columns) | ||
| elif self.lenshape == 1: | ||
| return self.inputconstructor_expanddim(results, columns=columns) | ||
| | ||
| def _wrap_output(self, output, index, names=None): | ||
| """ common agg/transform wrapping logic """ | ||
| output = output[self.name] | ||
| | ||
| if names is not None: | ||
| return DataFrame(output, index=index, columns=names) | ||
| return self.inputconstructor_expanddim( | ||
| output, index=index, columns=names) | ||
| else: | ||
| name = self.name | ||
| if name is None: | ||
| name = self._selected_obj.name | ||
| return Series(output, index=index, name=name) | ||
| return self.inputconstructor( | ||
| output, index=index, name=name) | ||
| | ||
| def _wrap_aggregated_output(self, output, names=None): | ||
| return self._wrap_output(output=output, | ||
| | @@ -2873,6 +2924,7 @@ def nunique(self, dropna=True): | |
| inc[idx] = 1 | ||
| | ||
| out = np.add.reduceat(inc, idx).astype('int64', copy=False) | ||
| | ||
| res = out if ids[0] != -1 else out[1:] | ||
| ri = self.grouper.result_index | ||
| | ||
| | @@ -2881,9 +2933,7 @@ def nunique(self, dropna=True): | |
| res, out = np.zeros(len(ri), dtype=out.dtype), res | ||
| res[ids] = out | ||
| | ||
| return Series(res, | ||
| index=ri, | ||
| name=self.name) | ||
| return self.inputconstructor_sliced(res, index=ri, name=self.name) | ||
| | ||
| @deprecate_kwarg('take_last', 'keep', | ||
| mapping={True: 'last', False: 'first'}) | ||
| | @@ -3024,10 +3074,9 @@ def count(self): | |
| ids = com._ensure_platform_int(ids) | ||
| out = np.bincount(ids[mask], minlength=ngroups or None) | ||
| | ||
| return Series(out, | ||
| index=self.grouper.result_index, | ||
| name=self.name, | ||
| dtype='int64') | ||
| return self.inputconstructor_sliced( | ||
| out, index=self.grouper.result_index, | ||
| name=self.name, dtype='int64') | ||
| | ||
| def _apply_to_column_groupbys(self, func): | ||
| """ return a pass thru """ | ||
| | @@ -3141,7 +3190,11 @@ def aggregate(self, arg, *args, **kwargs): | |
| result.columns.levels[0], | ||
| name=self._selected_obj.columns.name) | ||
| except: | ||
| result = self._aggregate_generic(arg, *args, **kwargs) | ||
| if self.lenshape == 2: | ||
| result = self.inputconstructor( | ||
| Contributor There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. not sure what you are doing here | ||
| self._aggregate_generic(arg, *args, **kwargs)) | ||
| else: | ||
| result = self._aggregate_generic(arg, *args, **kwargs) | ||
| | ||
| if not self.as_index: | ||
| self._insert_inaxis_grouper_inplace(result) | ||
| | @@ -3719,7 +3772,12 @@ def _wrap_agged_blocks(self, items, blocks): | |
| if self.axis == 1: | ||
| result = result.T | ||
| | ||
| return self._reindex_output(result)._convert(datetime=True) | ||
| if isinstance(self.inputconstructor(), SparseDataFrame): | ||
| return DataFrame( | ||
| Contributor There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. again this is a completely crazy thing. doing this kind of introspection is so fraught with peril and should simply not be done here. Yes maybe 1 function to do it is fine. | ||
| self._reindex_output(result)._convert(datetime=True)) | ||
| else: | ||
| return self.inputconstructor( | ||
| self._reindex_output(result)._convert(datetime=True)) | ||
| | ||
| def _reindex_output(self, result): | ||
| """ | ||
| | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| | @@ -784,7 +784,13 @@ def wrapper(self, other, axis=None): | |
| # always return a full value series here | ||
| res = _values_from_object(res) | ||
| | ||
| res = pd.Series(res, index=self.index, name=self.name, dtype='bool') | ||
| from pandas.sparse.api import SparseSeries | ||
| Contributor There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. you are changing this completely here Author There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could you explain what you mean? The way I understand this, wrapper always return a Series even if the input is a SparseSeries. I would like wrapper to be able to return a SubclassedSeries if the input is a SubclassedSeries, but the return type would still have to be a Series if the input is a SparseSeries. | ||
| if isinstance(self, SparseSeries): | ||
| res = pd.Series( | ||
| res, index=self.index, name=self.name, dtype='bool') | ||
| Contributor There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same comment | ||
| else: | ||
| res = self._constructor( | ||
| res, index=self.index, name=self.name, dtype='bool') | ||
| return res | ||
| | ||
| return wrapper | ||
| | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| | @@ -63,7 +63,7 @@ class _Unstacker(object): | |
| """ | ||
| | ||
| def __init__(self, values, index, level=-1, value_columns=None, | ||
| fill_value=None): | ||
| fill_value=None, return_type=DataFrame): | ||
| Contributor There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ??? | ||
| | ||
| self.is_categorical = None | ||
| if values.ndim == 1: | ||
| | @@ -74,6 +74,7 @@ def __init__(self, values, index, level=-1, value_columns=None, | |
| self.values = values | ||
| self.value_columns = value_columns | ||
| self.fill_value = fill_value | ||
| self.return_type = return_type | ||
| | ||
| if value_columns is None and values.shape[1] != 1: # pragma: no cover | ||
| raise ValueError('must pass column labels for multi-column data') | ||
| | @@ -169,7 +170,7 @@ def get_result(self): | |
| ordered=ordered) | ||
| for i in range(values.shape[-1])] | ||
| | ||
| return DataFrame(values, index=index, columns=columns) | ||
| return self.return_type(values, index=index, columns=columns) | ||
| | ||
| def get_new_values(self): | ||
| values = self.values | ||
| | @@ -330,8 +331,9 @@ def pivot(self, index=None, columns=None, values=None): | |
| index = self.index | ||
| else: | ||
| index = self[index] | ||
| indexed = Series(self[values].values, | ||
| index=MultiIndex.from_arrays([index, self[columns]])) | ||
| indexed = self._constructor_sliced( | ||
| self[values].values, | ||
| index=MultiIndex.from_arrays([index, self[columns]])) | ||
| return indexed.unstack(columns) | ||
| | ||
| | ||
| | @@ -407,7 +409,8 @@ def unstack(obj, level, fill_value=None): | |
| return obj.T.stack(dropna=False) | ||
| else: | ||
| unstacker = _Unstacker(obj.values, obj.index, level=level, | ||
| fill_value=fill_value) | ||
| fill_value=fill_value, | ||
| return_type=obj._constructor_expanddim) | ||
| return unstacker.get_result() | ||
| | ||
| | ||
| | @@ -439,8 +442,8 @@ def _unstack_frame(obj, level, fill_value=None): | |
| newb = make_block(new_values.T, placement=new_placement) | ||
| new_blocks.append(newb) | ||
| | ||
| result = DataFrame(BlockManager(new_blocks, new_axes)) | ||
| mask_frame = DataFrame(BlockManager(mask_blocks, new_axes)) | ||
| result = obj._constructor(BlockManager(new_blocks, new_axes)) | ||
| mask_frame = obj._constructor(BlockManager(mask_blocks, new_axes)) | ||
| return result.ix[:, mask_frame.sum(0) > 0] | ||
| else: | ||
| unstacker = _Unstacker(obj.values, obj.index, level=level, | ||
| | @@ -509,7 +512,7 @@ def factorize(index): | |
| mask = notnull(new_values) | ||
| new_values = new_values[mask] | ||
| new_index = new_index[mask] | ||
| return Series(new_values, index=new_index) | ||
| return frame._constructor_sliced(new_values, index=new_index) | ||
| | ||
| | ||
| def stack_multiple(frame, level, dropna=True): | ||
| | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
use
gt.ABCIndexClass(its already imported)