pandas-dev · javadnoorb · Aug 7, 2018 · Aug 7, 2018 · Aug 7, 2018 · Aug 7, 2018
diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt
@@ -640,6 +640,7 @@ Plotting
 
 - Bug in :func:`DataFrame.plot.scatter` and :func:`DataFrame.plot.hexbin` caused x-axis label and ticklabels to disappear when colorbar was on in IPython inline backend (:issue:`10611`, :issue:`10678`, and :issue:`20455`)
 - Bug in plotting a Series with datetimes using :func:`matplotlib.axes.Axes.scatter` (:issue:`22039`)
+- The new argument ``equal_bins`` in :func:`SeriesGroupBy.hist` sets histogram bins to equal values (:issue:`22222`)
 
 Groupby/Resample/Rolling
 ^^^^^^^^^^^^^^^^^^^^^^^^

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
@@ -581,6 +581,16 @@ def wrapper(*args, **kwargs):
  if 'axis' not in kwargs_with_axis or \
  kwargs_with_axis['axis'] is None:
  kwargs_with_axis['axis'] = self.axis
+ if name == 'hist' and kwargs.get('equal_bins', False):
+ # GH-22222
+ bins = kwargs.get('bins')
+ if bins is None:
+ bins = 10 # use default value used in `hist_series`
+ if is_scalar(bins):
+ # share the same numpy array for all group bins
+ bins = np.linspace(self.obj.min(),
+ self.obj.max(), bins + 1)
+ kwargs['bins'] = bins
 
  def curried_with_axis(x):
  return f(x, *args, **kwargs_with_axis)

diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py
@@ -2443,7 +2443,7 @@ def hist_frame(data, column=None, by=None, grid=True, xlabelsize=None,
 
 def hist_series(self, by=None, ax=None, grid=True, xlabelsize=None,
  xrot=None, ylabelsize=None, yrot=None, figsize=None,
- bins=10, **kwds):
+ bins=10, equal_bins=False, **kwds):
  """
  Draw histogram of the input series using matplotlib
 
@@ -2470,8 +2470,11 @@ def hist_series(self, by=None, ax=None, grid=True, xlabelsize=None,
  bin edges are calculated and returned. If bins is a sequence, gives
  bin edges, including left edge of first bin and right edge of last
  bin. In this case, bins is returned unmodified.
- bins: integer, default 10
- Number of histogram bins to be used
+ equal_bins : boolean, default False
+ Uses the overall maximum and minimum of the groups to set a shared
+ bins sequence, leading to equal bin widths for all
+ groups (only works if bins==None or int).
+
  `**kwds` : keywords
  To be passed to the actual plotting function
 

diff --git a/pandas/tests/plotting/test_groupby.py b/pandas/tests/plotting/test_groupby.py
@@ -10,11 +10,52 @@
 import numpy as np
 
 from pandas.tests.plotting.common import TestPlotBase
+import pytest
 
 
 @td.skip_if_no_mpl
 class TestDataFrameGroupByPlots(TestPlotBase):
 
+ @pytest.mark.parametrize('equal_bins, bins, expected1, expected2', (
+ (True, 10,
+ [-3., -2.4, -1.8, -1.2, -0.6, 0., 0.6, 1.2, 1.8, 2.4],
+ [-3., -2.4, -1.8, -1.2, -0.6, 0., 0.6, 1.2, 1.8, 2.4]),
+ (True, None,
+ [-3., -2.4, -1.8, -1.2, -0.6, 0., 0.6, 1.2, 1.8, 2.4],
+ [-3., -2.4, -1.8, -1.2, -0.6, 0., 0.6, 1.2, 1.8, 2.4]),
+ (True, [-3., -2.4, -1.8, -1.2, -0.6, 0., 0.6, 1.2, 1.8, 2.4, 3.],
+ [-3., -2.4, -1.8, -1.2, -0.6, 0., 0.6, 1.2, 1.8, 2.4],
+ [-3., -2.4, -1.8, -1.2, -0.6, 0., 0.6, 1.2, 1.8, 2.4]),
+ (False, [-3., -2.4, -1.8, -1.2, -0.6, 0., 0.6, 1.2, 1.8, 2.4, 3.],
+ [-3., -2.4, -1.8, -1.2, -0.6, 0., 0.6, 1.2, 1.8, 2.4],
+ [-3., -2.4, -1.8, -1.2, -0.6, 0., 0.6, 1.2, 1.8, 2.4]),
+ (False, 10,
+ [-3., -2.4, -1.8, -1.2, -0.6, 0., 0.6, 1.2, 1.8, 2.4],
+ [-1., -0.8, -0.6, -0.4, -0.2, 0., 0.2, 0.4, 0.6, 0.8]),
+ (False, None,
+ [-3., -2.4, -1.8, -1.2, -0.6, 0., 0.6, 1.2, 1.8, 2.4],
+ [-1., -0.8, -0.6, -0.4, -0.2, 0., 0.2, 0.4, 0.6, 0.8])
+ ))
+ def test_hist_bins_match(self, equal_bins, bins, expected1, expected2):
+ # GH-22222
+ df = DataFrame(np.append(np.linspace(-3, 3, 100),
+ np.linspace(-1, 1, 100)),
+ columns=['value'])
+ df['group'] = [0] * 100 + [1] * 100
+ g = df.groupby('group')['value']
+ ax = g.hist(bins=bins, alpha=0.7, equal_bins=equal_bins)[0]
+
+ # x-axis (leftmost) hist bar coordinates:
+ # group0: points[:num_bins]
+ # group1: points[num_bins:]
+ num_bins = len(expected1)
+ points = np.array([patch.get_bbox().get_points()
+ for patch in ax.patches])[:, 0, 0]
+
+ tm.assert_almost_equal(points[:num_bins], np.array(expected1))
+ tm.assert_almost_equal(points[num_bins:], np.array(expected2))
+ tm.close()
+
  def test_series_groupby_plotting_nominally_works(self):
  n = 10
  weight = Series(np.random.normal(166, 20, size=n))