pandas-dev · jorisvandenbossche · Jul 23, 2018 · May 27, 2018 · May 27, 2018 · May 28, 2018
diff --git a/doc/source/merging.rst b/doc/source/merging.rst
@@ -506,8 +506,8 @@ You can also pass a list of dicts or Series:
 
 .. _merging.join:
 
-Database-style DataFrame joining/merging
-----------------------------------------
+Database-style DataFrame or named Series joining/merging
+--------------------------------------------------------
 
 pandas has full-featured, **high performance** in-memory join operations
 idiomatically very similar to relational databases like SQL. These methods
@@ -522,7 +522,7 @@ Users who are familiar with SQL but new to pandas might be interested in a
 :ref:`comparison with SQL<compare_with_sql.join>`.
 
 pandas provides a single function, :func:`~pandas.merge`, as the entry point for 
-all standard database join operations between ``DataFrame`` objects:
+all standard database join operations between ``DataFrame`` or named ``Series`` objects:
 
 ::
 
@@ -531,40 +531,40 @@ all standard database join operations between ``DataFrame`` objects:
  suffixes=('_x', '_y'), copy=True, indicator=False,
  validate=None)
 
-* ``left``: A DataFrame object.
-* ``right``: Another DataFrame object.
+* ``left``: A DataFrame or named Series object.
+* ``right``: Another DataFrame or named Series object.
 * ``on``: Column or index level names to join on. Must be found in both the left
- and right DataFrame objects. If not passed and ``left_index`` and
+ and right DataFrame and/or Series objects. If not passed and ``left_index`` and
  ``right_index`` are ``False``, the intersection of the columns in the
- DataFrames will be inferred to be the join keys.
-* ``left_on``: Columns or index levels from the left DataFrame to use as
+ DataFrames and/or Series will be inferred to be the join keys.
+* ``left_on``: Columns or index levels from the left DataFrame or Series to use as
  keys. Can either be column names, index level names, or arrays with length
- equal to the length of the DataFrame.
-* ``right_on``: Columns or index levels from the right DataFrame to use as
+ equal to the length of the DataFrame or Series.
+* ``right_on``: Columns or index levels from the right DataFrame or Series to use as
  keys. Can either be column names, index level names, or arrays with length
- equal to the length of the DataFrame.
+ equal to the length of the DataFrame or Series.
 * ``left_index``: If ``True``, use the index (row labels) from the left
- DataFrame as its join key(s). In the case of a DataFrame with a MultiIndex
+ DataFrame or Series as its join key(s). In the case of a DataFrame or Series with a MultiIndex
  (hierarchical), the number of levels must match the number of join keys
- from the right DataFrame.
-* ``right_index``: Same usage as ``left_index`` for the right DataFrame
+ from the right DataFrame or Series.
+* ``right_index``: Same usage as ``left_index`` for the right DataFrame or Series
 * ``how``: One of ``'left'``, ``'right'``, ``'outer'``, ``'inner'``. Defaults
  to ``inner``. See below for more detailed description of each method.
 * ``sort``: Sort the result DataFrame by the join keys in lexicographical
  order. Defaults to ``True``, setting to ``False`` will improve performance
  substantially in many cases.
 * ``suffixes``: A tuple of string suffixes to apply to overlapping
  columns. Defaults to ``('_x', '_y')``.
-* ``copy``: Always copy data (default ``True``) from the passed DataFrame
+* ``copy``: Always copy data (default ``True``) from the passed DataFrame or named Series
  objects, even when reindexing is not necessary. Cannot be avoided in many
  cases but may improve performance / memory usage. The cases where copying
  can be avoided are somewhat pathological but this option is provided
  nonetheless.
 * ``indicator``: Add a column to the output DataFrame called ``_merge``
  with information on the source of each row. ``_merge`` is Categorical-type
  and takes on a value of ``left_only`` for observations whose merge key
- only appears in ``'left'`` DataFrame, ``right_only`` for observations whose
- merge key only appears in ``'right'`` DataFrame, and ``both`` if the
+ only appears in ``'left'`` DataFrame or Series, ``right_only`` for observations whose
+ merge key only appears in ``'right'`` DataFrame or Series, and ``both`` if the
  observation's merge key is found in both.
 
 * ``validate`` : string, default None.
@@ -584,10 +584,10 @@ all standard database join operations between ``DataFrame`` objects:
 
  Support for specifying index levels as the ``on``, ``left_on``, and
  ``right_on`` parameters was added in version 0.23.0.
+ Support for merging named ``Series`` objects was added in version 0.24.0.
 
-The return type will be the same as ``left``. If ``left`` is a ``DataFrame``
-and ``right`` is a subclass of DataFrame, the return type will still be
-``DataFrame``.
+The return type will be the same as ``left``. If ``left`` is a ``DataFrame`` or named ``Series``
+and ``right`` is a subclass of ``DataFrame``, the return type will still be ``DataFrame``.
 
 ``merge`` is a function in the pandas namespace, and it is also available as a
 ``DataFrame`` instance method :meth:`~DataFrame.merge`, with the calling 

diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt
@@ -12,6 +12,7 @@ v0.24.0 (Month XX, 2018)
 
 New features
 ~~~~~~~~~~~~
+- :func:`merge` now directly allows merge between objects of type ``DataFrame`` and named ``Series``, without the need to convert the ``Series`` object into a ``DataFrame`` beforehand (:issue:`21220`)
 
 - ``ExcelWriter`` now accepts ``mode`` as a keyword argument, enabling append to existing workbooks when using the ``openpyxl`` engine (:issue:`3441`)
 

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -137,16 +137,16 @@
 """
 
 _merge_doc = """
-Merge DataFrame objects by performing a database-style join operation by
-columns or indexes.
+Merge DataFrame or named Series objects by performing a database-style join
+operation by columns or indexes.
 
 If joining columns on columns, the DataFrame indexes *will be
 ignored*. Otherwise if joining indexes on indexes or indexes on a column or
 columns, the index will be passed on.
 
 Parameters
 ----------%s
-right : DataFrame, Series or dict
+right : DataFrame or named Series
  Object to merge with.
 how : {'left', 'right', 'outer', 'inner'}, default 'inner'
  Type of merge to be performed.
@@ -217,6 +217,7 @@
 -----
 Support for specifying index levels as the `on`, `left_on`, and
 `right_on` parameters was added in version 0.23.0
+Support for merging named Series objects was added in version 0.24.0
 
 See Also
 --------

diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py
@@ -11,7 +11,7 @@
 import pandas.compat as compat
 
 from pandas import (Categorical, DataFrame,
- Index, MultiIndex, Timedelta)
+ Index, MultiIndex, Timedelta, Series)
 from pandas.core.arrays.categorical import _recode_for_categories
 from pandas.core.frame import _merge_doc
 from pandas.core.dtypes.common import (
@@ -493,6 +493,8 @@ def __init__(self, left, right, how='inner', on=None,
  left_index=False, right_index=False, sort=True,
  suffixes=('_x', '_y'), copy=True, indicator=False,
  validate=None):
+ left = validate_operand(left)
+ right = validate_operand(right)
  self.left = self.orig_left = left
  self.right = self.orig_right = right
  self.how = how
@@ -519,13 +521,6 @@ def __init__(self, left, right, how='inner', on=None,
  raise ValueError(
  'indicator option can only accept boolean or string arguments')
 
- if not isinstance(left, DataFrame):
- raise ValueError('can not merge DataFrame with instance of '
- 'type {left}'.format(left=type(left)))
- if not isinstance(right, DataFrame):
- raise ValueError('can not merge DataFrame with instance of '
- 'type {right}'.format(right=type(right)))
-
  if not is_bool(left_index):
  raise ValueError(
  'left_index parameter must be of type bool, not '
@@ -1645,3 +1640,16 @@ def _should_fill(lname, rname):
 
 def _any(x):
  return x is not None and com._any_not_none(*x)
+
+
+def validate_operand(obj):
+ if isinstance(obj, DataFrame):
+ return obj
+ elif isinstance(obj, Series):
+ if obj.name is None:
+ raise ValueError('Cannot merge a Series without a name')
+ else:
+ return obj.to_frame()
+ else:
+ raise TypeError('Can only merge Series or DataFrame objects, '
+ 'a {obj} was passed'.format(obj=type(obj)))
diff --git a/pandas/tests/reshape/merge/test_join.py b/pandas/tests/reshape/merge/test_join.py
@@ -228,16 +228,19 @@ def test_join_on_fails_with_different_column_counts(self):
  index=tm.makeCustomIndex(10, 2))
  merge(df, df2, right_on='a', left_on=['a', 'b'])
 
- def test_join_on_fails_with_wrong_object_type(self):
- # GH12081
- wrongly_typed = [Series([0, 1]), 2, 'str', None, np.array([0, 1])]
- df = DataFrame({'a': [1, 1]})
+ @pytest.mark.parametrize("wrong_type", [2, 'str', None, np.array([0, 1])])
+ def test_join_on_fails_with_wrong_object_type(self, wrong_type):
+ # GH12081 - original issue
+
+ # GH21220 - merging of Series and DataFrame is now allowed
+ # Edited the test to remove the Series object from test parameters
+ # Also, parameterized the original test
 
- for obj in wrongly_typed:
-  with tm.assert_raises_regex(ValueError, str(type(obj))):
-  merge(obj, df, left_on='a', right_on='a')
-  with tm.assert_raises_regex(ValueError, str(type(obj))):
-  merge(df, obj, left_on='a', right_on='a')
+ df = DataFrame({'a': [1, 1]})
+ with tm.assert_raises_regex(TypeError, str(type(wrong_type))):
+ merge(wrong_type, df, left_on='a', right_on='a')
+ with tm.assert_raises_regex(TypeError, str(type(wrong_type))):
+ merge(df, wrong_type, left_on='a', right_on='a')
 
  def test_join_on_pass_vector(self):
  expected = self.target.join(self.source, on='C')

diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py
@@ -1887,3 +1887,33 @@ def test_merge_index_types(index):
  OrderedDict([('left_data', [1, 2]), ('right_data', [1.0, 2.0])]),
  index=index)
  assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("on,left_on,right_on,left_index,right_index,nms,nm", [
+ (['outer', 'inner'], None, None, False, False, ['outer', 'inner'], 'B'),
+ (None, None, None, True, True, ['outer', 'inner'], 'B'),
+ (None, ['outer', 'inner'], None, False, True, None, 'B'),
+ (None, None, ['outer', 'inner'], True, False, None, 'B'),
+ (['outer', 'inner'], None, None, False, False, ['outer', 'inner'], None),
+ (None, None, None, True, True, ['outer', 'inner'], None),
+ (None, ['outer', 'inner'], None, False, True, None, None),
+ (None, None, ['outer', 'inner'], True, False, None, None)])
+def test_merge_series(on, left_on, right_on, left_index, right_index, nms, nm):
+ # GH 21220
+ a = pd.DataFrame({"A": [1, 2, 3, 4]},
+ index=pd.MultiIndex.from_product([['a', 'b'], [0, 1]],
+ names=['outer', 'inner']))
+ b = pd.Series([1, 2, 3, 4],
+ index=pd.MultiIndex.from_product([['a', 'b'], [1, 2]],
+ names=['outer', 'inner']), name=nm)
+ expected = pd.DataFrame({"A": [2, 4], "B": [1, 3]},
+ index=pd.MultiIndex.from_product([['a', 'b'], [1]],
+ names=nms))
+ if nm is not None:
+ result = pd.merge(a, b, on=on, left_on=left_on, right_on=right_on,
+ left_index=left_index, right_index=right_index)
+ tm.assert_frame_equal(result, expected)
+ else:
+ with tm.assert_raises_regex(ValueError, 'a Series without a name'):
+ result = pd.merge(a, b, on=on, left_on=left_on, right_on=right_on,
+ left_index=left_index, right_index=right_index)