pandas-dev · jreback · Jan 28, 2020 · Jan 23, 2020 · Jan 26, 2020 · Jan 27, 2020
diff --git a/pandas/core/apply.py b/pandas/core/apply.py
@@ -14,7 +14,7 @@
  is_list_like,
  is_sequence,
 )
-from pandas.core.dtypes.generic import ABCMultiIndex, ABCSeries
+from pandas.core.dtypes.generic import ABCSeries
 
 from pandas.core.construction import create_series_with_explicit_dtype
 
@@ -278,9 +278,8 @@ def apply_standard(self):
  if (
  self.result_type in ["reduce", None]
  and not self.dtypes.apply(is_extension_array_dtype).any()
- # Disallow complex_internals since libreduction shortcut
- # cannot handle MultiIndex
- and not isinstance(self.agg_axis, ABCMultiIndex)
+ # Disallow complex_internals since libreduction shortcut raises a TypeError
+ and not self.agg_axis._has_complex_internals
  ):
 
  values = self.values

diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
@@ -164,8 +164,8 @@ def apply(self, f, data: FrameOrSeries, axis: int = 0):
  com.get_callable_name(f) not in base.plotting_methods
  and isinstance(splitter, FrameSplitter)
  and axis == 0
- # apply_frame_axis0 doesn't allow MultiIndex
- and not isinstance(sdata.index, MultiIndex)
+ # fast_apply/libreduction doesn't allow non-numpy backed indexes
+ and not sdata.index._has_complex_internals
  ):
  try:
  result_values, mutated = splitter.fast_apply(f, group_keys)
@@ -616,8 +616,8 @@ def agg_series(self, obj: Series, func):
  # TODO: can we get a performant workaround for EAs backed by ndarray?
  return self._aggregate_series_pure_python(obj, func)
 
- elif isinstance(obj.index, MultiIndex):
- # MultiIndex; Pre-empt TypeError in _aggregate_series_fast
+ elif obj.index._has_complex_internals:
+ # Pre-empt TypeError in _aggregate_series_fast
  return self._aggregate_series_pure_python(obj, func)
 
  try:

diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
@@ -4109,6 +4109,14 @@ def _assert_can_do_op(self, value):
  if not is_scalar(value):
  raise TypeError(f"'value' must be a scalar, passed: {type(value).__name__}")
 
+ @property
+ def _has_complex_internals(self):
+ """
+ Indicates if an index is not directly backed by a numpy array
+ """
+ # used to avoid libreduction code paths, which raise or require conversion
+ return False
+
  def _is_memory_usage_qualified(self) -> bool:
  """
  Return a boolean if we need a qualified .info display.

diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py
@@ -378,6 +378,11 @@ def values(self):
  """ return the underlying data, which is a Categorical """
  return self._data
 
+ @property
+ def _has_complex_internals(self):
+ # used to avoid libreduction code paths, which raise or require conversion
+ return True
+
  def _wrap_setop_result(self, other, result):
  name = get_op_result_name(self, other)
  # We use _shallow_copy rather than the Index implementation

diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py
@@ -411,6 +411,11 @@ def values(self):
  """
  return self._data
 
+ @property
+ def _has_complex_internals(self):
+ # used to avoid libreduction code paths, which raise or require conversion
+ return True
+
  def __array_wrap__(self, result, context=None):
  # we don't want the superclass implementation
  return result

diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py
@@ -1346,6 +1346,11 @@ def values(self):
  self._tuples = lib.fast_zip(values)
  return self._tuples
 
+ @property
+ def _has_complex_internals(self):
+ # used to avoid libreduction code paths, which raise or require conversion
+ return True
+
  @cache_readonly
  def is_monotonic_increasing(self) -> bool:
  """

diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py
@@ -255,6 +255,11 @@ def _simple_new(cls, values, name=None, freq=None, **kwargs):
  def values(self):
  return np.asarray(self)
 
+ @property
+ def _has_complex_internals(self):
+ # used to avoid libreduction code paths, which raise or require conversion
+ return True
+
  def _shallow_copy(self, values=None, **kwargs):
  # TODO: simplify, figure out type of values
  if values is None:

diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py
@@ -360,6 +360,23 @@ def test_func_duplicates_raises():
  df.groupby("A").agg(["min", "min"])
 
 
+@pytest.mark.parametrize(
+ "index",
+ [
+ pd.CategoricalIndex(list("abc")),
+ pd.interval_range(0, 3),
+ pd.period_range("2020", periods=3, freq="D"),
+ pd.MultiIndex.from_tuples([("a", 0), ("a", 1), ("b", 0)]),
+ ],
+)
+def test_agg_index_has_complex_internals(index):
+ # GH 31223
+ df = DataFrame({"group": [1, 1, 2], "value": [0, 1, 0]}, index=index)
+ result = df.groupby("group").agg({"value": Series.nunique})
+ expected = DataFrame({"group": [1, 2], "value": [2, 1]}).set_index("group")
+ tm.assert_frame_equal(result, expected)
+
+
 class TestNamedAggregationSeries:
  def test_series_named_agg(self):
  df = pd.Series([1, 2, 3, 4])

diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py
@@ -811,3 +811,19 @@ def test_groupby_apply_datetime_result_dtypes():
  index=["observation", "color", "mood", "intensity", "score"],
  )
  tm.assert_series_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+ "index",
+ [
+ pd.CategoricalIndex(list("abc")),
+ pd.interval_range(0, 3),
+ pd.period_range("2020", periods=3, freq="D"),
+ pd.MultiIndex.from_tuples([("a", 0), ("a", 1), ("b", 0)]),
+ ],
+)
+def test_apply_index_has_complex_internals(index):
+ # GH 31248
+ df = DataFrame({"group": [1, 1, 2], "value": [0, 1, 0]}, index=index)
+ result = df.groupby("group").apply(lambda x: x)
+ tm.assert_frame_equal(result, df)