Skip to content
8 changes: 7 additions & 1 deletion pandas/core/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -943,9 +943,15 @@ def value_counts(self, normalize=False, sort=True, ascending=False,
counts : Series
"""
from pandas.core.algorithms import value_counts
from pandas import Index

result = value_counts(self, sort=sort, ascending=ascending,
normalize=normalize, bins=bins, dropna=dropna)
return result

if isinstance(self, Index):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

use gt.ABCIndexClass (its already imported)

return result
else:
return self._constructor(result)

def unique(self):
"""
Expand Down
11 changes: 7 additions & 4 deletions pandas/core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,9 @@ def _use_inf_as_null(key):

def _isnull_ndarraylike(obj):

if isinstance(obj, pd.SparseSeries):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why are you adding this?

obj = pd.SparseArray(obj)

values = getattr(obj, 'values', obj)
dtype = values.dtype

Expand Down Expand Up @@ -197,8 +200,8 @@ def _isnull_ndarraylike(obj):

# box
if isinstance(obj, gt.ABCSeries):
from pandas import Series
result = Series(result, index=obj.index, name=obj.name, copy=False)
result = obj._constructor(
result, index=obj.index, name=obj.name, copy=False)

return result

Expand Down Expand Up @@ -226,8 +229,8 @@ def _isnull_ndarraylike_old(obj):

# box
if isinstance(obj, gt.ABCSeries):
from pandas import Series
result = Series(result, index=obj.index, name=obj.name, copy=False)
result = obj._constructor(
result, index=obj.index, name=obj.name, copy=False)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

these are changes which I would accept, e.g. being more generic. really really try hard not to do if/thens.


return result

Expand Down
3 changes: 2 additions & 1 deletion pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1918,6 +1918,7 @@ def _ixs(self, i, axis=0):
# if we are a copy, mark as such
copy = (isinstance(new_values, np.ndarray) and
new_values.base is None)

result = self._constructor_sliced(new_values,
index=self.columns,
name=self.index[i],
Expand Down Expand Up @@ -4900,7 +4901,7 @@ def f(x):
if axis == 0:
result = com._coerce_to_dtypes(result, self.dtypes)

return Series(result, index=labels)
return self._constructor_sliced(result, index=labels)

def idxmin(self, axis=0, skipna=True):
"""
Expand Down
17 changes: 13 additions & 4 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -1773,7 +1773,6 @@ def xs(self, key, axis=0, level=None, copy=None, drop_level=True):
result = self._constructor_sliced(new_values, index=self.columns,
name=self.index[loc], copy=copy,
dtype=new_values.dtype)

else:
result = self.iloc[loc]
result.index = new_index
Expand Down Expand Up @@ -4898,7 +4897,12 @@ def describe_numeric_1d(series):
formatted_percentiles + ['max'])
d = ([series.count(), series.mean(), series.std(), series.min()] +
[series.quantile(x) for x in percentiles] + [series.max()])
return pd.Series(d, index=stat_index, name=series.name)
if isinstance(self, ABCSeries):
return self._constructor(
d, index=stat_index, name=series.name)
else:
return self._constructor_sliced(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this doesn't make much sense.you would need more axes here and/or different meta-data. why are you doing this?

d, index=stat_index, name=series.name)

def describe_categorical_1d(data):
names = ['count', 'unique']
Expand All @@ -4918,7 +4922,12 @@ def describe_categorical_1d(data):
names += ['top', 'freq']
result += [top, freq]

return pd.Series(result, index=names, name=data.name)
if isinstance(self, ABCSeries):
return self._constructor(
result, index=names, name=data.name)
else:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same here (e.g. I can buy ._constructor(...)), why do you need the other branch

return self._constructor_sliced(
result, index=names, name=data.name)

def describe_1d(data):
if com.is_bool_dtype(data):
Expand Down Expand Up @@ -4957,7 +4966,7 @@ def describe_1d(data):

d = pd.concat(ldesc, join_axes=pd.Index([names]), axis=1)
d.columns = data.columns.copy()
return d
return self._constructor(d)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this results in additional construction overhead. Better to add some logic to concat to enable this

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually, I had already added the logic to concat, so the constructor was unnecessary here. I have removed it.


def _check_percentile(self, q):
"""Validate percentiles (used by describe and quantile)."""
Expand Down
88 changes: 73 additions & 15 deletions pandas/core/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,11 @@
is_categorical_dtype, _values_from_object,
is_datetime_or_timedelta_dtype, is_bool,
is_bool_dtype, AbstractMethodError,
_maybe_fill)
_maybe_fill, ABCSeries)
from pandas.core.config import option_context, is_callable

from pandas.sparse.frame import SparseDataFrame

import pandas.lib as lib
from pandas.lib import Timestamp
import pandas.tslib as tslib
Expand Down Expand Up @@ -340,6 +343,16 @@ def __init__(self, obj, keys=None, axis=0, level=None,
if axis != 0:
raise ValueError('as_index=False only valid for axis=0')

self.inputconstructor = obj._constructor
if isinstance(obj, ABCSeries):
self.inputconstructor_sliced = obj._constructor
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is crazy? what is going on here?

else:
self.inputconstructor_sliced = obj._constructor_sliced
if isinstance(obj, Panel):
self.inputconstructor_expanddim = obj._constructor
else:
self.inputconstructor_expanddim = obj._constructor_expanddim
self.lenshape = obj.ndim
self.as_index = as_index
self.keys = keys
self.sort = sort
Expand Down Expand Up @@ -393,6 +406,38 @@ def indices(self):
self._assure_grouper()
return self.grouper.indices

@property
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

All these are necessary? Using self.obj._constructor... looks clearer.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I introduced the properties because of a comment above.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I understand @jreback 's point is something like below. setter and assignments are not needed.

@property def inputconstructor(self): return self.obj._constructor 

In addition, my opinion is property is not very beneficial because it doesn't simplify variable names. @jreback?

def inputconstructor(self):
return self._inputconstructor

@inputconstructor.setter
def inputconstructor(self, constructor):
self._inputconstructor = constructor

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this naming schema is not good.

something more along the lines of _1d_constructor, _2d_constructor is much better

@property
def inputconstructor_sliced(self):
return self._inputconstructor_sliced

@inputconstructor_sliced.setter
def inputconstructor_sliced(self, constructor):
self._inputconstructor_sliced = constructor
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why are you saving this?


@property
def inputconstructor_expanddim(self):
return self._inputconstructor_expanddim

@inputconstructor_expanddim.setter
def inputconstructor_expanddim(self, constructor):
self._inputconstructor_expanddim = constructor

@property
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what the heck is this?

def lenshape(self):
return self._lenshape

@lenshape.setter
def lenshape(self, lenshape_in):
self._lenshape = lenshape_in

def _get_indices(self, names):
"""
safe get multiple indices, translate keys for
Expand Down Expand Up @@ -1328,7 +1373,7 @@ def cumcount(self, ascending=True):

index = self._selected_obj.index
cumcounts = self._cumcount_array(ascending=ascending)
return Series(cumcounts, index)
return self.inputconstructor_sliced(cumcounts, index)

@Substitution(name='groupby')
@Appender(_doc_template)
Expand Down Expand Up @@ -2603,7 +2648,7 @@ def aggregate(self, func_or_funcs, *args, **kwargs):
result = self._aggregate_named(func_or_funcs, *args, **kwargs)

index = Index(sorted(result), name=self.grouper.names[0])
ret = Series(result, index=index)
ret = self.inputconstructor(result, index=index)

if not self.as_index: # pragma: no cover
print('Warning, ignoring as_index=True')
Expand Down Expand Up @@ -2659,19 +2704,25 @@ def _aggregate_multiple_funcs(self, arg, _level):
if _level:
return results
return list(compat.itervalues(results))[0]
return DataFrame(results, columns=columns)

if self.lenshape == 2:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

how is the ever not 2d?

return self.inputconstructor(results, columns=columns)
elif self.lenshape == 1:
return self.inputconstructor_expanddim(results, columns=columns)

def _wrap_output(self, output, index, names=None):
""" common agg/transform wrapping logic """
output = output[self.name]

if names is not None:
return DataFrame(output, index=index, columns=names)
return self.inputconstructor_expanddim(
output, index=index, columns=names)
else:
name = self.name
if name is None:
name = self._selected_obj.name
return Series(output, index=index, name=name)
return self.inputconstructor(
output, index=index, name=name)

def _wrap_aggregated_output(self, output, names=None):
return self._wrap_output(output=output,
Expand Down Expand Up @@ -2873,6 +2924,7 @@ def nunique(self, dropna=True):
inc[idx] = 1

out = np.add.reduceat(inc, idx).astype('int64', copy=False)

res = out if ids[0] != -1 else out[1:]
ri = self.grouper.result_index

Expand All @@ -2881,9 +2933,7 @@ def nunique(self, dropna=True):
res, out = np.zeros(len(ri), dtype=out.dtype), res
res[ids] = out

return Series(res,
index=ri,
name=self.name)
return self.inputconstructor_sliced(res, index=ri, name=self.name)

@deprecate_kwarg('take_last', 'keep',
mapping={True: 'last', False: 'first'})
Expand Down Expand Up @@ -3024,10 +3074,9 @@ def count(self):
ids = com._ensure_platform_int(ids)
out = np.bincount(ids[mask], minlength=ngroups or None)

return Series(out,
index=self.grouper.result_index,
name=self.name,
dtype='int64')
return self.inputconstructor_sliced(
out, index=self.grouper.result_index,
name=self.name, dtype='int64')

def _apply_to_column_groupbys(self, func):
""" return a pass thru """
Expand Down Expand Up @@ -3141,7 +3190,11 @@ def aggregate(self, arg, *args, **kwargs):
result.columns.levels[0],
name=self._selected_obj.columns.name)
except:
result = self._aggregate_generic(arg, *args, **kwargs)
if self.lenshape == 2:
result = self.inputconstructor(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not sure what you are doing here

self._aggregate_generic(arg, *args, **kwargs))
else:
result = self._aggregate_generic(arg, *args, **kwargs)

if not self.as_index:
self._insert_inaxis_grouper_inplace(result)
Expand Down Expand Up @@ -3719,7 +3772,12 @@ def _wrap_agged_blocks(self, items, blocks):
if self.axis == 1:
result = result.T

return self._reindex_output(result)._convert(datetime=True)
if isinstance(self.inputconstructor(), SparseDataFrame):
return DataFrame(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

again this is a completely crazy thing. doing this kind of introspection is so fraught with peril and should simply not be done here. Yes maybe 1 function to do it is fine.

self._reindex_output(result)._convert(datetime=True))
else:
return self.inputconstructor(
self._reindex_output(result)._convert(datetime=True))

def _reindex_output(self, result):
"""
Expand Down
8 changes: 7 additions & 1 deletion pandas/core/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -784,7 +784,13 @@ def wrapper(self, other, axis=None):
# always return a full value series here
res = _values_from_object(res)

res = pd.Series(res, index=self.index, name=self.name, dtype='bool')
from pandas.sparse.api import SparseSeries
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you are changing this completely here

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you explain what you mean? The way I understand this, wrapper always return a Series even if the input is a SparseSeries. I would like wrapper to be able to return a SubclassedSeries if the input is a SubclassedSeries, but the return type would still have to be a Series if the input is a SparseSeries.

if isinstance(self, SparseSeries):
res = pd.Series(
res, index=self.index, name=self.name, dtype='bool')
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same comment

else:
res = self._constructor(
res, index=self.index, name=self.name, dtype='bool')
return res

return wrapper
Expand Down
19 changes: 11 additions & 8 deletions pandas/core/reshape.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ class _Unstacker(object):
"""

def __init__(self, values, index, level=-1, value_columns=None,
fill_value=None):
fill_value=None, return_type=DataFrame):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

???


self.is_categorical = None
if values.ndim == 1:
Expand All @@ -74,6 +74,7 @@ def __init__(self, values, index, level=-1, value_columns=None,
self.values = values
self.value_columns = value_columns
self.fill_value = fill_value
self.return_type = return_type

if value_columns is None and values.shape[1] != 1: # pragma: no cover
raise ValueError('must pass column labels for multi-column data')
Expand Down Expand Up @@ -169,7 +170,7 @@ def get_result(self):
ordered=ordered)
for i in range(values.shape[-1])]

return DataFrame(values, index=index, columns=columns)
return self.return_type(values, index=index, columns=columns)

def get_new_values(self):
values = self.values
Expand Down Expand Up @@ -330,8 +331,9 @@ def pivot(self, index=None, columns=None, values=None):
index = self.index
else:
index = self[index]
indexed = Series(self[values].values,
index=MultiIndex.from_arrays([index, self[columns]]))
indexed = self._constructor_sliced(
self[values].values,
index=MultiIndex.from_arrays([index, self[columns]]))
return indexed.unstack(columns)


Expand Down Expand Up @@ -407,7 +409,8 @@ def unstack(obj, level, fill_value=None):
return obj.T.stack(dropna=False)
else:
unstacker = _Unstacker(obj.values, obj.index, level=level,
fill_value=fill_value)
fill_value=fill_value,
return_type=obj._constructor_expanddim)
return unstacker.get_result()


Expand Down Expand Up @@ -439,8 +442,8 @@ def _unstack_frame(obj, level, fill_value=None):
newb = make_block(new_values.T, placement=new_placement)
new_blocks.append(newb)

result = DataFrame(BlockManager(new_blocks, new_axes))
mask_frame = DataFrame(BlockManager(mask_blocks, new_axes))
result = obj._constructor(BlockManager(new_blocks, new_axes))
mask_frame = obj._constructor(BlockManager(mask_blocks, new_axes))
return result.ix[:, mask_frame.sum(0) > 0]
else:
unstacker = _Unstacker(obj.values, obj.index, level=level,
Expand Down Expand Up @@ -509,7 +512,7 @@ def factorize(index):
mask = notnull(new_values)
new_values = new_values[mask]
new_index = new_index[mask]
return Series(new_values, index=new_index)
return frame._constructor_sliced(new_values, index=new_index)


def stack_multiple(frame, level, dropna=True):
Expand Down
8 changes: 8 additions & 0 deletions pandas/tests/frame/test_analytics.py
Original file line number Diff line number Diff line change
Expand Up @@ -340,6 +340,10 @@ def test_count(self):
expected = Series(0, index=[])
tm.assert_series_equal(result, expected)

def test_count_subclassing(self):
df = tm.SubclassedDataFrame(data=[[1, 2, np.nan], [2, 3, 4]])
tm.assertIsInstance(df.count(), tm.SubclassedSeries)

def test_sum(self):
self._check_stat_op('sum', np.sum, has_numeric_only=True)

Expand All @@ -349,6 +353,10 @@ def test_sum(self):
has_numeric_only=True, check_dtype=False,
check_less_precise=True)

def test_sum_subclassing(self):
df = tm.SubclassedDataFrame(data=[[1, 2, 3], [2, 3, 4]])
tm.assertIsInstance(df.sum(), tm.SubclassedSeries)

def test_stat_operators_attempt_obj_array(self):
data = {
'a': [-0.00049987540199591344, -0.0016467257772919831,
Expand Down
Loading