pandas-dev · alexifm · Jun 14, 2019 · Jun 15, 2019 · Jun 15, 2019 · Jun 15, 2019
diff --git a/doc/source/whatsnew/v0.25.2.rst b/doc/source/whatsnew/v0.25.2.rst
@@ -99,7 +99,7 @@ Other
 ^^^^^
 
 - Compatibility with Python 3.8 in :meth:`DataFrame.query` (:issue:`27261`)
--
+- Bug in :func:`get_indexer_dict` when passed keys are not numpy array. (:issue:`26860`)
 
 .. _whatsnew_0.252.contributors:
 

diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py
@@ -305,6 +305,8 @@ def get_flattened_iterator(comp_ids, ngroups, levels, labels):
 
 def get_indexer_dict(label_list, keys):
  """ return a diction of {labels} -> {indexers} """
+ # address GH 26860
+ keys = [np.asarray(key) for key in keys]
  shape = list(map(len, keys))
 
  group_index = get_group_index(label_list, shape, sort=True, xnull=True)

diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
@@ -351,6 +351,98 @@ def f3(x):
  df2.groupby("a").apply(f3)
 
 
+def test_groupby_indices_error():
+ # GH 26860
+ # Test if DataFrame Groupby builds gb.indices
+ dt = pd.to_datetime(['2018-01-01', '2018-02-01', '2018-03-01'])
+ df = pd.DataFrame({
+ 'a': pd.Series(list('abc')),
+ 'b': pd.Series(dt, dtype='category'),
+ 'c': pd.Categorical.from_codes([-1, 0, 1], categories=[0, 1])
+ })
+
+ df.groupby(['a', 'b']).indices
+
+
+def test_groupby_indices_output():
+ # GH 26860
+ # Test if DataFrame Groupby builds gb.indices correctly.
+
+ cols = [
+ 'int_series', 'int_series_cat', 'float_series', 'float_series_cat',
+ 'dt_series', 'dt_series_cat', 'period_series', 'period_series_cat'
+ ]
+
+ int_series = pd.Series([1, 2, 3])
+ dt_series = pd.to_datetime(['2018Q1', '2018Q2', '2018Q3'])
+ df = pd.DataFrame(
+ data={
+ 'int_series': int_series,
+ 'int_series_cat': int_series.astype('category'),
+ 'float_series': int_series.astype('float'),
+ 'float_series_cat': int_series.astype('float').astype('category'),
+ 'dt_series': dt_series,
+ 'dt_series_cat': dt_series.astype('category'),
+ 'period_series': dt_series.to_period('Q'),
+ 'period_series_cat': dt_series.to_period('Q').astype('category')
+ },
+ columns=cols
+ )
+
+ from itertools import chain, combinations
+
+ gb_cols_it = chain.from_iterable(
+ combinations(cols, n + 1) for n in range(len(cols))
+ )
+ for gb_cols in gb_cols_it:
+ gb_cols = list(gb_cols)
+ num_gb_cols = len(gb_cols)
+
+ if num_gb_cols == 1:
+ s = df[gb_cols[0]]
+ col_vals = list(s.unique())
+
+ if pd.api.types.is_datetime64_any_dtype(s):
+ col_vals = list(map(pd.Timestamp, col_vals))
+
+ target = {
+ key: np.array([i])
+ for i, key in enumerate(col_vals)
+ }
+ else:
+ col_vals = {
+ col: list(df[col].unique())
+ for col in gb_cols
+ }
+
+ def to_dt(elems):
+ elems = map(pd.Timestamp, elems)
+ elems = map(lambda dt: dt.to_datetime64(), elems)
+ elems = list(elems)
+ return elems
+
+ for col in gb_cols:
+ if pd.api.types.is_datetime64_any_dtype(df[col]):
+ col_vals[col] = to_dt(col_vals[col])
+
+ elif pd.api.types.is_categorical_dtype(df[col]):
+ if pd.api.types.is_datetime64_any_dtype(df[col].cat.categories):
+ col_vals[col] = to_dt(col_vals[col])
+
+ it = zip(*(col_vals[col] for col in gb_cols))
+ target = {
+ key: np.array([i])
+ for i, key in enumerate(it)
+ }
+
+ indices = df.groupby(gb_cols).indices
+
+ assert set(target.keys()) == set(indices.keys())
+ for key in target.keys():
+ assert pd.core.dtypes.missing.array_equivalent(
+ target[key], indices[key])
+
+
 def test_attr_wrapper(ts):
  grouped = ts.groupby(lambda x: x.weekday())
-Original file line number
+Diff line change
@@ Expand Up / @@ -99,7 +99,7 @@ Other @@
  ^^^^^
  - Compatibility with Python 3.8 in :meth:`DataFrame.query` (:issue:`27261`)
- -
+ - Bug in :func:`get_indexer_dict` when passed keys are not numpy array. (:issue:`26860`)
  .. _whatsnew_0.252.contributors:
@@ Expand Down @@