Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
660d2f5
Raise ValueError for non numerical join columns in merge_asof
phofl May 30, 2020
cafedcc
Delete unused imports
phofl May 30, 2020
fa1d909
Implement review requests
phofl May 30, 2020
34a5633
Run black pandas
phofl May 30, 2020
783bc60
Change error message
phofl May 31, 2020
e086864
Change whats new entry
phofl May 31, 2020
a4001c6
Merge branch 'master' of https://github.com/pandas-dev/pandas into 29130
phofl May 31, 2020
aaa3480
Change error type
phofl May 31, 2020
ec3147e
Move dtype check
phofl May 31, 2020
129eb77
Use raw string
phofl May 31, 2020
4680833
Move check again
phofl Jun 1, 2020
fb55c9f
Move dtype check to _get_merge_keys
phofl Jun 1, 2020
72fb8b4
Run black pandas
phofl Jun 1, 2020
0103974
Overwrite _maybe_coerce_merge_keys
phofl Jun 1, 2020
4cc8bdc
Merge branch 'master' of https://github.com/pandas-dev/pandas into 29130
phofl Jun 3, 2020
a6048a6
Merge branch 'master' of https://github.com/pandas-dev/pandas into 29130
phofl Sep 16, 2020
cdaa8ef
Move whats new entry
phofl Sep 16, 2020
47e211c
Add docs from other pr
phofl Sep 23, 2020
3129353
Merge branch 'master' of https://github.com/pandas-dev/pandas into 29130
phofl Sep 23, 2020
0242f47
Run black pandas
phofl Sep 23, 2020
ef47721
Revert "Add docs from other pr"
phofl Sep 23, 2020
109c824
Merge branch 'master' of https://github.com/pandas-dev/pandas into 29130
phofl Dec 29, 2020
3a66f05
Move whatsnew
phofl Dec 29, 2020
da377c7
Merge branch 'master' of https://github.com/pandas-dev/pandas into 29130
phofl Jan 1, 2021
c0ce857
Move merge asof validation
phofl Jan 4, 2021
2c164c5
Remove newline
phofl Jan 4, 2021
a00eb9c
Parametrize test further
phofl Jan 4, 2021
f62a873
Merge branch 'master' of https://github.com/pandas-dev/pandas into 29130
phofl Jan 4, 2021
4782233
Add comment
phofl Jan 4, 2021
6ebb7a3
Change Error
phofl Jan 5, 2021
File filter

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Move dtype check to _get_merge_keys
  • Loading branch information
phofl committed Jun 1, 2020
commit fb55c9f094a0e3ef5cd4fb63081e98c346d42e7c
14 changes: 8 additions & 6 deletions pandas/core/reshape/merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -1637,6 +1637,7 @@ def _get_merge_keys(self):

# note this function has side effects
(left_join_keys, right_join_keys, join_names) = super()._get_merge_keys()
len_by_cols = 0 if self.left_by is None else len(self.left_by)

# validate index types are the same
for i, (lk, rk) in enumerate(zip(left_join_keys, right_join_keys)):
Expand All @@ -1662,6 +1663,13 @@ def _get_merge_keys(self):
)
raise MergeError(msg)

if i >= len_by_cols:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

no, we don't want to do this.

put your changes in the super call to _get_merge_keys. just do it upfront (that is what was not clear before).

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think you mean _maybe_coerce_merge_keys then? The dtype validation for the regular merge is done there.

The _get_merge_keys function I changed is already from _AsOfMerge, so I could not overwrite this again. This function overwrites the original _get_merge_keys function and calls the super() function before checking if both join cols have the same dtype

if is_object_dtype(rk.dtype) or is_object_dtype(lk.dtype):
raise ValueError(
f"Incompatible merge [{i}] dtype, {repr(lk.dtype)} and "
f"{repr(rk.dtype)}, both sides must have numeric dtype"
)

# validate tolerance; datetime.timedelta or Timedelta if we have a DTI
if self.tolerance is not None:

Expand Down Expand Up @@ -1731,12 +1739,6 @@ def flip(xs) -> np.ndarray:
)
tolerance = self.tolerance

if is_object_dtype(right_values.dtype) or is_object_dtype(left_values.dtype):
raise ValueError(
f"Incompatible merge dtype, {repr(left_values.dtype)} and "
f"{repr(right_values.dtype)}, both sides must have numeric dtype"
)

# we require sortedness and non-null values in the join keys
if not Index(left_values).is_monotonic:
side = "left"
Expand Down
12 changes: 11 additions & 1 deletion pandas/tests/reshape/merge/test_merge_asof.py
Original file line number Diff line number Diff line change
Expand Up @@ -1351,6 +1351,16 @@ def test_non_numerical_dtype(data):
right = pd.DataFrame({"x": data})
with pytest.raises(
ValueError,
match="Incompatible merge dtype, .*, both sides must have numeric dtype",
match=r"Incompatible merge \[0\] dtype, .*, both sides must have numeric dtype",
):
pd.merge_asof(left, right, on="x")


def test_some_issues():
left = pd.DataFrame({"a": ["12", "13", "15"], "left_val1": ["a", "b", "c"]})
right = pd.DataFrame({"a": ["a", "b", "c"], "left_val": ["d", "e", "f"]})
with pytest.raises(
ValueError,
match=r"Incompatible merge \[1\] dtype, .*, both sides must have numeric dtype",
):
pd.merge_asof(left, right, left_on="left_val1", right_on="a", left_by="a", right_by="left_val")