Skip to content
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
221bd87
tests not passing but i need to rebase again
jbrockmendel May 21, 2021
1f45dbd
API: make concat_compat behave like CategoricalIndex._concat
jbrockmendel Aug 4, 2021
d68f868
Merge branch 'master' into ci-concat
jbrockmendel Nov 9, 2021
4cc59c7
Merge branch 'master' into ci-concat
jbrockmendel Nov 23, 2021
53d3aea
Merge branch 'master' into ci-concat
jbrockmendel Nov 25, 2021
329d2ba
Merge branch 'master' into ci-concat
jbrockmendel Nov 26, 2021
abdd2a8
avoid FutureWarning
jbrockmendel Nov 26, 2021
6b9a75e
Merge branch 'master' into ci-concat
jbrockmendel Dec 5, 2021
ac7debd
Merge branch 'master' into ci-concat
jbrockmendel Dec 6, 2021
94976da
Merge branch 'master' into ci-concat
jbrockmendel Dec 12, 2021
6b2c55d
Merge branch 'master' into ci-concat
jbrockmendel Dec 26, 2021
bb6aa1a
Merge branch 'master' into ci-concat
jbrockmendel Dec 27, 2021
3120e51
Merge branch 'master' into ci-concat
jbrockmendel Dec 27, 2021
84fed7a
Merge branch 'master' into ci-concat
jbrockmendel Jan 1, 2022
8e58449
Merge branch 'master' into ci-concat
jbrockmendel Jan 1, 2022
8e9f60a
Merge branch 'master' into ci-concat
jbrockmendel Jan 6, 2022
9164016
avoid append
jbrockmendel Jan 6, 2022
7ba8d2d
catch warnings
jbrockmendel Jan 6, 2022
6daccec
Merge branch 'master' into ci-concat
jbrockmendel Jan 7, 2022
75ab0a0
ArrayManager compat
jbrockmendel Jan 7, 2022
1f6ac46
Merge branch 'master' into ci-concat
jbrockmendel Jan 8, 2022
16481a0
Merge branch 'master' into ci-concat
jbrockmendel Jan 11, 2022
881f47f
Merge branch 'main' into ci-concat
jbrockmendel Jan 13, 2022
ee5dd27
Merge branch 'main' into ci-concat
jbrockmendel Jan 17, 2022
File filter

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
API: make concat_compat behave like CategoricalIndex._concat
  • Loading branch information
jbrockmendel committed Aug 4, 2021
commit 1f45dbd28fecbaded6beb5ceb740bb74e419d5c4
20 changes: 15 additions & 5 deletions pandas/core/dtypes/concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,17 +125,27 @@ def is_nonempty(x) -> bool:
if any_ea:
# we ignore axis here, as internally concatting with EAs is always
# for axis=0
if any(is_categorical_dtype(x.dtype) for x in to_concat):
first = [x for x in to_concat if is_categorical_dtype(x.dtype)][0]
from pandas import Index
cats = [x for x in to_concat if is_categorical_dtype(x.dtype)]
if len(cats):
# TODO: Ideally this shouldn't be order-dependent
first = cats[0]
from pandas import (
CategoricalIndex,
Index,
)

ci = CategoricalIndex(first)

try:
codes = np.concatenate([Index(first)._is_dtype_compat(Index(c)).codes for c in to_concat])
codes = np.concatenate(
[ci._is_dtype_compat(Index(c)).codes for c in to_concat]
)
except TypeError:
# not all to_concat elements are among our categories (or NA)
pass
else:
cat = first._from_backing_data(codes)
if first.ordered:
if all(x.dtype.ordered for x in cats):
cat = cat.as_ordered()
return cat

Expand Down
23 changes: 2 additions & 21 deletions pandas/core/indexes/category.py
Original file line number Diff line number Diff line change
Expand Up @@ -278,7 +278,8 @@ def _is_dtype_compat(self, other) -> Categorical:
"categories must match existing categories when appending"
)

return other
# TODO: this is a lot like the non-coercing constructor
return other.astype(self.dtype, copy=False)

def equals(self, other: object) -> bool:
"""
Expand Down Expand Up @@ -546,23 +547,3 @@ def map(self, mapper):
"""
mapped = self._values.map(mapper)
return Index(mapped, name=self.name)

def _concat(self, to_concat: list[Index], name: Hashable) -> Index:
alt = Index._concat(self, to_concat, name=name) # uses concat_compat

# if calling index is category, don't check dtype of others
try:
codes = np.concatenate([self._is_dtype_compat(c).codes for c in to_concat])
except TypeError:
# not all to_concat elements are among our categories (or NA)
from pandas.core.dtypes.concat import concat_compat

res = concat_compat(to_concat)
out = Index(res, name=name)
assert out.equals(alt)
assert out.dtype == alt.dtype
return out
else:
cat = self._data._from_backing_data(codes)
assert cat.dtype == alt.dtype
return type(self)._simple_new(cat, name=name)
5 changes: 3 additions & 2 deletions pandas/tests/indexes/categorical/test_append.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,10 @@ def test_append_non_categories(self, ci):
tm.assert_index_equal(result, expected, exact=True)

def test_append_object(self, ci):
# GH#14298 - if base object is not categorical -> coerce to object
# GH#14298 - if base object and all entries are among
# categories -> cast to categorical (GH#41626)
result = Index(["c", "a"]).append(ci)
expected = Index(list("caaabbca"))
expected = Index(list("caaabbca"), dtype=ci.dtype)
tm.assert_index_equal(result, expected, exact=True)

def test_append_to_another(self):
Expand Down
43 changes: 22 additions & 21 deletions pandas/tests/reshape/concat/test_append_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -511,18 +511,18 @@ def test_union_categorical_same_categories_different_order(self):
tm.assert_series_equal(result, expected)

def test_concat_categorical_coercion(self):
# GH 13524
# GH 13524, GH#41626

# category + not-category => not-category
# category + not-category (but all-castable/nan) => category
s1 = Series([1, 2, np.nan], dtype="category")
s2 = Series([2, 1, 2])

exp = Series([1, 2, np.nan, 2, 1, 2], dtype="object")
exp = Series([1, 2, np.nan, 2, 1, 2], dtype=s1.dtype)
tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
tm.assert_series_equal(s1.append(s2, ignore_index=True), exp)

# result shouldn't be affected by 1st elem dtype
exp = Series([2, 1, 2, 1, 2, np.nan], dtype="object")
exp = Series([2, 1, 2, 1, 2, np.nan], dtype=s1.dtype)
tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp)
tm.assert_series_equal(s2.append(s1, ignore_index=True), exp)

Expand Down Expand Up @@ -562,31 +562,31 @@ def test_concat_categorical_coercion(self):
tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp)
tm.assert_series_equal(s2.append(s1, ignore_index=True), exp)

# if normal series only contains NaN-likes => not-category
# if normal series only contains NaN-likes => category (GH#41626)
s1 = Series([10, 11], dtype="category")
s2 = Series([np.nan, np.nan, np.nan])

exp = Series([10, 11, np.nan, np.nan, np.nan])
exp = Series([10, 11, np.nan, np.nan, np.nan], dtype=s1.dtype)
tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
tm.assert_series_equal(s1.append(s2, ignore_index=True), exp)

exp = Series([np.nan, np.nan, np.nan, 10, 11])
exp = Series([np.nan, np.nan, np.nan, 10, 11], dtype=s1.dtype)
tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp)
tm.assert_series_equal(s2.append(s1, ignore_index=True), exp)

def test_concat_categorical_3elem_coercion(self):
# GH 13524
# GH 13524, GH#41626

# mixed dtypes => not-category
# mixed dtypes, all castable to our categories => category (GH#41626)
s1 = Series([1, 2, np.nan], dtype="category")
s2 = Series([2, 1, 2], dtype="category")
s3 = Series([1, 2, 1, 2, np.nan])

exp = Series([1, 2, np.nan, 2, 1, 2, 1, 2, 1, 2, np.nan], dtype="float")
exp = Series([1, 2, np.nan, 2, 1, 2, 1, 2, 1, 2, np.nan], dtype=s1.dtype)
tm.assert_series_equal(pd.concat([s1, s2, s3], ignore_index=True), exp)
tm.assert_series_equal(s1.append([s2, s3], ignore_index=True), exp)

exp = Series([1, 2, 1, 2, np.nan, 1, 2, np.nan, 2, 1, 2], dtype="float")
exp = Series([1, 2, 1, 2, np.nan, 1, 2, np.nan, 2, 1, 2], dtype=s1.dtype)
tm.assert_series_equal(pd.concat([s3, s1, s2], ignore_index=True), exp)
tm.assert_series_equal(s3.append([s1, s2], ignore_index=True), exp)

Expand Down Expand Up @@ -654,7 +654,7 @@ def test_concat_categorical_ordered(self):
tm.assert_series_equal(s1.append([s2, s1], ignore_index=True), exp)

def test_concat_categorical_coercion_nan(self):
# GH 13524
# GH 13524, GH#41626

# some edge cases
# category + not-category => not category
Expand All @@ -665,18 +665,19 @@ def test_concat_categorical_coercion_nan(self):
tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
tm.assert_series_equal(s1.append(s2, ignore_index=True), exp)

# all elements of s2 are nan => category (GH#41626)
s1 = Series([1, np.nan], dtype="category")
s2 = Series([np.nan, np.nan])

exp = Series([1, np.nan, np.nan, np.nan], dtype="float")
exp = Series([1, np.nan, np.nan, np.nan], dtype=s1.dtype)
tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
tm.assert_series_equal(s1.append(s2, ignore_index=True), exp)

# mixed dtype, all nan-likes => not-category
# mixed dtype, all nan-likes => category (GH#41626)
s1 = Series([np.nan, np.nan], dtype="category")
s2 = Series([np.nan, np.nan])

exp = Series([np.nan, np.nan, np.nan, np.nan])
exp = Series([np.nan, np.nan, np.nan, np.nan], dtype=s1.dtype)
tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
tm.assert_series_equal(s1.append(s2, ignore_index=True), exp)
tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp)
Expand All @@ -692,7 +693,7 @@ def test_concat_categorical_coercion_nan(self):
tm.assert_series_equal(s1.append(s2, ignore_index=True), exp)

def test_concat_categorical_empty(self):
# GH 13524
# GH 13524, GH#41626

s1 = Series([], dtype="category")
s2 = Series([1, 2], dtype="category")
Expand All @@ -712,11 +713,11 @@ def test_concat_categorical_empty(self):
s1 = Series([], dtype="category")
s2 = Series([], dtype="object")

# different dtype => not-category
tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), s2)
tm.assert_series_equal(s1.append(s2, ignore_index=True), s2)
tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), s2)
tm.assert_series_equal(s2.append(s1, ignore_index=True), s2)
# different dtype, but all castable (bc empty) => category (GH#41626)
tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), s1)
tm.assert_series_equal(s1.append(s2, ignore_index=True), s1)
tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), s1)
tm.assert_series_equal(s2.append(s1, ignore_index=True), s1)

s1 = Series([], dtype="category")
s2 = Series([np.nan, np.nan])
Expand Down
6 changes: 3 additions & 3 deletions pandas/tests/reshape/concat/test_empty.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ def test_concat_empty_series_timelike(self, tz, values):
("M8[ns]", np.int64, np.object_),
# categorical
("category", "category", "category"),
("category", "object", "object"),
("category", "object", "category"), # GH#41626
],
)
def test_concat_empty_series_dtypes(self, left, right, expected):
Expand Down Expand Up @@ -177,12 +177,12 @@ def test_concat_empty_series_dtypes_triple(self):
)

def test_concat_empty_series_dtype_category_with_array(self):
# GH#18515
# GH#18515, GH#41626
assert (
concat(
[Series(np.array([]), dtype="category"), Series(dtype="float64")]
).dtype
== "float64"
== "category"
)

def test_concat_empty_series_dtypes_sparse(self):
Expand Down