88 TYPE_CHECKING ,
99 Any ,
1010 Hashable ,
11+ cast ,
1112 final ,
1213)
1314import warnings
1415
1516import numpy as np
1617
1718from pandas ._typing import (
19+ AnyArrayLike ,
1820 ArrayLike ,
1921 NDFrameT ,
2022 npt ,
3840import pandas .core .common as com
3941from pandas .core .frame import DataFrame
4042from pandas .core .groupby import ops
41- from pandas .core .groupby .categorical import (
42- recode_for_groupby ,
43- recode_from_groupby ,
44- )
43+ from pandas .core .groupby .categorical import CategoricalGrouper
4544from pandas .core .indexes .api import (
46- CategoricalIndex ,
4745 Index ,
4846 MultiIndex ,
4947)
@@ -461,8 +459,7 @@ class Grouping:
461459
462460 _codes : npt .NDArray [np .signedinteger ] | None = None
463461 _group_index : Index | None = None
464- _passed_categorical : bool
465- _all_grouper : Categorical | None
462+ _cat_grouper : CategoricalGrouper | None = None
466463 _index : Index
467464
468465 def __init__ (
@@ -479,16 +476,12 @@ def __init__(
479476 self .level = level
480477 self ._orig_grouper = grouper
481478 self .grouping_vector = _convert_grouper (index , grouper )
482- self ._all_grouper = None
483479 self ._index = index
484480 self ._sort = sort
485481 self .obj = obj
486- self ._observed = observed
487482 self .in_axis = in_axis
488483 self ._dropna = dropna
489484
490- self ._passed_categorical = False
491-
492485 # we have a single grouper which may be a myriad of things,
493486 # some of which are dependent on the passing in level
494487
@@ -527,13 +520,10 @@ def __init__(
527520 self .grouping_vector = Index (ng , name = newgrouper .result_index .name )
528521
529522 elif is_categorical_dtype (self .grouping_vector ):
530- # a passed Categorical
531- self ._passed_categorical = True
532-
533- self .grouping_vector , self ._all_grouper = recode_for_groupby (
523+ self ._cat_grouper = CategoricalGrouper .make (
534524 self .grouping_vector , sort , observed
535525 )
536-
526+ self . grouping_vector = self . _cat_grouper . new_grouping_vector
537527 elif not isinstance (
538528 self .grouping_vector , (Series , Index , ExtensionArray , np .ndarray )
539529 ):
@@ -631,20 +621,23 @@ def group_arraylike(self) -> ArrayLike:
631621 # _group_index is set in __init__ for MultiIndex cases
632622 return self ._group_index ._values
633623
634- elif self ._all_grouper is not None :
624+ elif (
625+ self ._cat_grouper is not None
626+ and self ._cat_grouper .original_grouping_vector is not None
627+ ):
635628 # retain dtype for categories, including unobserved ones
636629 return self .result_index ._values
637630
638- return self ._codes_and_uniques [1 ]
631+ return cast ( ArrayLike , self ._codes_and_uniques [1 ])
639632
640633 @cache_readonly
641634 def result_index (self ) -> Index :
642- # result_index retains dtype for categories, including unobserved ones,
643- # which group_index does not
644- if self . _all_grouper is not None :
645- group_idx = self . group_index
646- assert isinstance ( group_idx , CategoricalIndex )
647- return recode_from_groupby ( self ._all_grouper , self ._sort , group_idx )
635+ """
636+ result_index retains dtype for categories, including unobserved ones,
637+ which group_index does not
638+ """
639+ if self . _cat_grouper is not None :
640+ return self ._cat_grouper . result_index ( self .group_index )
648641 return self .group_index
649642
650643 @cache_readonly
@@ -657,26 +650,10 @@ def group_index(self) -> Index:
657650 return Index ._with_infer (uniques , name = self .name )
658651
659652 @cache_readonly
660- def _codes_and_uniques (self ) -> tuple [npt .NDArray [np .signedinteger ], ArrayLike ]:
661- if self ._passed_categorical :
662- # we make a CategoricalIndex out of the cat grouper
663- # preserving the categories / ordered attributes
664- cat = self .grouping_vector
665- categories = cat .categories
666-
667- if self ._observed :
668- ucodes = algorithms .unique1d (cat .codes )
669- ucodes = ucodes [ucodes != - 1 ]
670- if self ._sort or cat .ordered :
671- ucodes = np .sort (ucodes )
672- else :
673- ucodes = np .arange (len (categories ))
674-
675- uniques = Categorical .from_codes (
676- codes = ucodes , categories = categories , ordered = cat .ordered
677- )
678- return cat .codes , uniques
679-
653+ def _codes_and_uniques (self ) -> tuple [npt .NDArray [np .signedinteger ], AnyArrayLike ]:
654+ uniques : AnyArrayLike
655+ if self ._cat_grouper is not None :
656+ return self ._cat_grouper .codes_and_uniques (self .grouping_vector )
680657 elif isinstance (self .grouping_vector , ops .BaseGrouper ):
681658 # we have a list of groupers
682659 codes = self .grouping_vector .codes_info
0 commit comments