Skip to content
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v0.23.2.txt
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,8 @@ Fixed Regressions
- Fixed regression in :meth:`to_csv` when handling file-like object incorrectly (:issue:`21471`)
- Re-allowed duplicate level names of a ``MultiIndex``. Accessing a level that has a duplicate name by name still raises an error (:issue:`19029`).
- Bug in both :meth:`DataFrame.first_valid_index` and :meth:`Series.first_valid_index` raised for a row index having duplicate values (:issue:`21441`)
- Fixed regression in :meth:`~DataFrame.reindex` and :meth:`~DataFrame.groupby`
with a MultiIndex or multiple keys that contains categorical datetime-like values (:issue:`21390`).
- Fixed regression in unary negative operations with object dtype (:issue:`21380`)
- Bug in :meth:`Timestamp.ceil` and :meth:`Timestamp.floor` when timestamp is a multiple of the rounding frequency (:issue:`21262`)

Expand Down
26 changes: 12 additions & 14 deletions pandas/core/indexes/multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
from pandas.compat.numpy import function as nv
from pandas import compat

from pandas.core.dtypes.dtypes import (
ExtensionDtype, PandasExtensionDtype)
from pandas.core.dtypes.common import (
_ensure_int64,
_ensure_platform_int,
Expand Down Expand Up @@ -807,20 +809,16 @@ def values(self):
return self._tuples

values = []
for lev, lab in zip(self.levels, self.labels):
# Need to box timestamps, etc.
box = hasattr(lev, '_box_values')
# Try to minimize boxing.
if box and len(lev) > len(lab):
taken = lev._box_values(algos.take_1d(lev._ndarray_values,
lab))
elif box:
taken = algos.take_1d(lev._box_values(lev._ndarray_values),
lab,
fill_value=lev._na_value)
else:
taken = algos.take_1d(np.asarray(lev._values), lab)
values.append(taken)

for i in range(self.nlevels):
vals = self._get_level_values(i)
if is_categorical_dtype(vals):
vals = vals.get_values()
if (isinstance(vals.dtype, (PandasExtensionDtype, ExtensionDtype))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

elif here

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So I need the 'if' here because the result of categorical.get_values() can still be an Index with extension dtype / datetime dtype.

I would like to explore a bit more how to streamline the path from series/index/array object -> numpy array that is boxed if needed (currenlty that doesn't seem to easy, and is handled again in many different places), but that is for another PR

or hasattr(vals, '_box_values')):
vals = vals.astype(object)
vals = np.array(vals, copy=False)
values.append(vals)

self._tuples = lib.fast_zip(values)
return self._tuples
Expand Down
15 changes: 14 additions & 1 deletion pandas/tests/frame/test_axis_select_reindex.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import numpy as np

from pandas.compat import lrange, lzip, u
from pandas import (compat, DataFrame, Series, Index, MultiIndex,
from pandas import (compat, DataFrame, Series, Index, MultiIndex, Categorical,
date_range, isna)
import pandas as pd

Expand Down Expand Up @@ -1129,6 +1129,19 @@ def test_reindex_multi(self):

assert_frame_equal(result, expected)

def test_reindex_multi_categorical_time(self):
# https://github.com/pandas-dev/pandas/issues/21390
midx = pd.MultiIndex.from_product(
[Categorical(['a', 'b', 'c']),
Categorical(date_range("2012-01-01", periods=3, freq='H'))])
df = pd.DataFrame({'a': range(len(midx))}, index=midx)
df2 = df.iloc[[0, 1, 2, 3, 4, 5, 6, 8]]

result = df2.reindex(midx)
expected = pd.DataFrame(
{'a': [0, 1, 2, 3, 4, 5, 6, np.nan, 8]}, index=midx)
assert_frame_equal(result, expected)

data = [[1, 2, 3], [1, 2, 3]]

@pytest.mark.parametrize('actual', [
Expand Down
20 changes: 20 additions & 0 deletions pandas/tests/groupby/test_categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -850,3 +850,23 @@ def test_empty_prod():
result = df.groupby("A", observed=False).B.prod(min_count=1)
expected = pd.Series([2, 1, np.nan], expected_idx, name='B')
tm.assert_series_equal(result, expected)


def test_groupby_multiindex_categorical_datetime():
# https://github.com/pandas-dev/pandas/issues/21390

df = pd.DataFrame({
'key1': pd.Categorical(list('abcbabcba')),
'key2': pd.Categorical(
list(pd.date_range('2018-06-01 00', freq='1T', periods=3)) * 3),
'values': np.arange(9),
})
result = df.groupby(['key1', 'key2']).mean()

idx = pd.MultiIndex.from_product(
[pd.Categorical(['a', 'b', 'c']),
pd.Categorical(pd.date_range('2018-06-01 00', freq='1T', periods=3))],
names=['key1', 'key2'])
expected = pd.DataFrame(
{'values': [0, 4, 8, 3, 4, 5, 6, np.nan, 2]}, index=idx)
assert_frame_equal(result, expected)
12 changes: 10 additions & 2 deletions pandas/tests/indexes/test_multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@

import pandas as pd

from pandas import (CategoricalIndex, DataFrame, Index, MultiIndex,
compat, date_range, period_range)
from pandas import (CategoricalIndex, Categorical, DataFrame, Index,
MultiIndex, compat, date_range, period_range)
from pandas.compat import PY3, long, lrange, lzip, range, u, PYPY
from pandas.errors import PerformanceWarning, UnsortedIndexError
from pandas.core.dtypes.dtypes import CategoricalDtype
Expand Down Expand Up @@ -1596,6 +1596,14 @@ def test_get_indexer_nearest(self):
with pytest.raises(NotImplementedError):
midx.get_indexer(['a'], method='pad', tolerance=2)

def test_get_indexer_categorical_time(self):
# https://github.com/pandas-dev/pandas/issues/21390
midx = MultiIndex.from_product(
[Categorical(['a', 'b', 'c']),
Categorical(date_range("2012-01-01", periods=3, freq='H'))])
result = midx.get_indexer(midx)
tm.assert_numpy_array_equal(result, np.arange(9, dtype=np.intp))

def test_hash_collisions(self):
# non-smoke test that we don't get hash collisions

Expand Down