Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v2.1.1.rst
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,9 @@ including other versions of pandas.

Fixed regressions
~~~~~~~~~~~~~~~~~
- Fixed regression in :func:`merge` when merging over a PyArrow string index (:issue:`54894`)
- Fixed regression in :meth:`DataFrame.__setitem__` raising ``AssertionError`` when setting a :class:`Series` with a partial :class:`MultiIndex` (:issue:`54875`)
-

.. ---------------------------------------------------------------------------
.. _whatsnew_211.bug_fixes:
Expand Down
8 changes: 6 additions & 2 deletions pandas/core/reshape/merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -2437,8 +2437,12 @@ def _factorize_keys(
length = len(dc.dictionary)

llab, rlab, count = (
pc.fill_null(dc.indices[slice(len_lk)], length).to_numpy(),
pc.fill_null(dc.indices[slice(len_lk, None)], length).to_numpy(),
pc.fill_null(dc.indices[slice(len_lk)], length)
.to_numpy()
.astype(np.intp, copy=False),
pc.fill_null(dc.indices[slice(len_lk, None)], length)
.to_numpy()
.astype(np.intp, copy=False),
len(dc.dictionary),
)
if how == "right":
Expand Down
12 changes: 12 additions & 0 deletions pandas/tests/reshape/merge/test_merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -2947,3 +2947,15 @@ def test_merge_ea_int_and_float_numpy():

result = df2.merge(df1)
tm.assert_frame_equal(result, expected.astype("float64"))


def test_merge_arrow_string_index():
# GH#54894
pytest.importorskip("pyarrow")
left = DataFrame({"a": ["a", "b"]}, dtype="string[pyarrow]")
right = DataFrame({"b": 1}, index=Index(["a", "c"], dtype="string[pyarrow]"))
result = left.merge(right, left_on="a", right_index=True, how="left")
expected = DataFrame(
{"a": Series(["a", "b"], dtype="string[pyarrow]"), "b": [1, np.nan]}
)
tm.assert_frame_equal(result, expected)