-
- Notifications
You must be signed in to change notification settings - Fork 19.4k
ENH: Merge DataFrame and Series using on (GH21220) #21223
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
91a3f16 64e6e66 8939fad b09ffb7 1a0e5a2 59b487d 0d14cc2 12da2ce 2d689ad 687568b 7ed9688 2ce56e7 ac9d5a1 fdf324c e896a91 69d8bda f7b333f c4c988c d099fd6 File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
GH21220
- Loading branch information
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| | @@ -11,7 +11,7 @@ | |
| import pandas.compat as compat | ||
| | ||
| from pandas import (Categorical, DataFrame, | ||
| Index, MultiIndex, Timedelta) | ||
| Index, MultiIndex, Timedelta, Series) | ||
| from pandas.core.arrays.categorical import _recode_for_categories | ||
| from pandas.core.frame import _merge_doc | ||
| from pandas.core.dtypes.common import ( | ||
| | @@ -492,6 +492,10 @@ def __init__(self, left, right, how='inner', on=None, | |
| left_index=False, right_index=False, sort=True, | ||
| suffixes=('_x', '_y'), copy=True, indicator=False, | ||
| validate=None): | ||
| if isinstance(left, Series): | ||
| left = left.to_frame() | ||
| if isinstance(right, Series): | ||
| right = right.to_frame() | ||
| ||
| self.left = self.orig_left = left | ||
| self.right = self.orig_right = right | ||
| self.how = how | ||
| | @@ -535,12 +539,14 @@ def __init__(self, left, right, how='inner', on=None, | |
| '{right_index}'.format(right_index=type(right_index))) | ||
| | ||
| # warn user when merging between different levels | ||
| if left.columns.nlevels != right.columns.nlevels: | ||
| msg = ('merging between different levels can give an unintended ' | ||
| 'result ({left} levels on the left, {right} on the right)' | ||
| ).format(left=left.columns.nlevels, | ||
| right=right.columns.nlevels) | ||
| warnings.warn(msg, UserWarning) | ||
| if isinstance(left, DataFrame) and isinstance(right, DataFrame): | ||
| if left.columns.nlevels != right.columns.nlevels: | ||
| msg = ('merging between different levels can give an ' | ||
| 'unintended result ({left} levels on the left, ' | ||
| '{right} on the right)' | ||
| ).format(left=left.columns.nlevels, | ||
| right=right.columns.nlevels) | ||
| warnings.warn(msg, UserWarning) | ||
| | ||
| self._validate_specification() | ||
| | ||
| | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| | @@ -1864,3 +1864,24 @@ def test_merge_index_types(index): | |
| OrderedDict([('left_data', [1, 2]), ('right_data', [1.0, 2.0])]), | ||
| index=index) | ||
| assert_frame_equal(result, expected) | ||
| | ||
| | ||
| Contributor There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. also parameterize this as much as possible. Contributor Author There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done | ||
| def test_merge_series(): | ||
| # GH 21220 | ||
| a = pd.DataFrame({"A": [1, 2, 3, 4]}, | ||
| index=pd.MultiIndex.from_product([['a', 'b'], [0, 1]], | ||
| names=['outer', 'inner'])) | ||
| b = pd.Series([1, 2, 3, 4], | ||
| index=pd.MultiIndex.from_product([['a', 'b'], [1, 2]], | ||
| names=['outer', 'inner']), name='B') | ||
| expected = pd.DataFrame({"A": [2, 4], "B": [1, 3]}, | ||
| index=pd.MultiIndex.from_product([['a', 'b'], [1]], | ||
| names=['outer', 'inner'])) | ||
| | ||
| # Testing current merge behvaior is as before | ||
| ||
| result = pd.merge(a, b.to_frame(), on=['outer', 'inner']) | ||
| tm.assert_frame_equal(result, expected) | ||
| | ||
| # Testing changed merge behvaior is as expected | ||
| result = pd.merge(a, b, on=['outer', 'inner']) | ||
| tm.assert_frame_equal(result, expected) | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
if u are actually going to do this
then you need to strip a ton of logic from below
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you clarify this comment? What logic would need to be stripped?
Currently, this just raises when you pass a Series (see line 525 a bit below)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks both - will make edits to this based on @jreback 's clarification
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nvm on the code removal, we do extension promotion in
concat, but nothing here.