pandas-dev
diff --git a/‎pandas/core/groupby.py‎
Lines changed: 16 additions & 4 deletions b/‎pandas/core/groupby.py‎
Lines changed: 16 additions & 4 deletions
diff --git a/‎pandas/tests/test_groupby.py‎
Lines changed: 52 additions & 1 deletion b/‎pandas/tests/test_groupby.py‎
Lines changed: 52 additions & 1 deletion
@@ -17,7 +17,7 @@
 from pandas.util.decorators import cache_readonly, Appender
 import pandas.core.algorithms as algos
 import pandas.core.common as com
-from pandas.core.common import _possibly_downcast_to_dtype, notnull
+from pandas.core.common import _possibly_downcast_to_dtype, isnull, notnull
 
 import pandas.lib as lib
 import pandas.algos as _algos
@@ -1605,8 +1605,19 @@ def filter(self, func, dropna=True, *args, **kwargs):
  else:
  wrapper = lambda x: func(x, *args, **kwargs)
 
- indexers = [self.obj.index.get_indexer(group.index) \
- if wrapper(group) else [] for _ , group in self]
+ # Interpret np.nan as False.
+ def true_and_notnull(x, *args, **kwargs):
+ b = wrapper(x, *args, **kwargs)
+ return b and notnull(b)
+
+ try:
+ indexers = [self.obj.index.get_indexer(group.index) \
+ if true_and_notnull(group) else [] \
+ for _ , group in self]
+ except ValueError:
+ raise TypeError("the filter must return a boolean result")
+ except TypeError:
+ raise TypeError("the filter must return a boolean result")
 
  if len(indexers) == 0:
  filtered = self.obj.take([]) # because np.concatenate would fail
@@ -2124,7 +2135,8 @@ def add_indexer():
  add_indexer()
  else:
  if getattr(res,'ndim',None) == 1:
- if res.ravel()[0]:
+ val = res.ravel()[0]
+ if val and notnull(val):
  add_indexer()
  else:
 
 
@@ -2642,9 +2642,37 @@ def raise_if_sum_is_zero(x):
  s = pd.Series([-1,0,1,2])
  grouper = s.apply(lambda x: x % 2)
  grouped = s.groupby(grouper)
- self.assertRaises(ValueError,
+ self.assertRaises(TypeError,
  lambda: grouped.filter(raise_if_sum_is_zero))
 
+ def test_filter_bad_shapes(self):
+ df = DataFrame({'A': np.arange(8), 'B': list('aabbbbcc'), 'C': np.arange(8)})
+ s = df['B']
+ g_df = df.groupby('B')
+ g_s = s.groupby(s)
+
+ f = lambda x: x
+ self.assertRaises(TypeError, lambda: g_df.filter(f))
+ self.assertRaises(TypeError, lambda: g_s.filter(f))
+
+ f = lambda x: x == 1
+ self.assertRaises(TypeError, lambda: g_df.filter(f))
+ self.assertRaises(TypeError, lambda: g_s.filter(f))
+
+ f = lambda x: np.outer(x, x)
+ self.assertRaises(TypeError, lambda: g_df.filter(f))
+ self.assertRaises(TypeError, lambda: g_s.filter(f))
+
+ def test_filter_nan_is_false(self):
+ df = DataFrame({'A': np.arange(8), 'B': list('aabbbbcc'), 'C': np.arange(8)})
+ s = df['B']
+ g_df = df.groupby(df['B'])
+ g_s = s.groupby(s)
+
+ f = lambda x: np.nan
+ assert_frame_equal(g_df.filter(f), df.loc[[]])
+ assert_series_equal(g_s.filter(f), s[[]])
+
  def test_filter_against_workaround(self):
  np.random.seed(0)
  # Series of ints
@@ -2697,6 +2725,29 @@ def test_filter_against_workaround(self):
  new_way = grouped.filter(lambda x: x['ints'].mean() > N/20)
  assert_frame_equal(new_way.sort_index(), old_way.sort_index())
 
+ def test_filter_using_len(self):
+ # BUG GH4447
+ df = DataFrame({'A': np.arange(8), 'B': list('aabbbbcc'), 'C': np.arange(8)})
+ grouped = df.groupby('B')
+ actual = grouped.filter(lambda x: len(x) > 2)
+ expected = DataFrame({'A': np.arange(2, 6), 'B': list('bbbb'), 'C': np.arange(2, 6)}, index=np.arange(2, 6))
+ assert_frame_equal(actual, expected)
+
+ actual = grouped.filter(lambda x: len(x) > 4)
+ expected = df.ix[[]]
+ assert_frame_equal(actual, expected)
+
+ # Series have always worked properly, but we'll test anyway.
+ s = df['B']
+ grouped = s.groupby(s)
+ actual = grouped.filter(lambda x: len(x) > 2)
+ expected = Series(4*['b'], index=np.arange(2, 6))
+ assert_series_equal(actual, expected)
+
+ actual = grouped.filter(lambda x: len(x) > 4)
+ expected = s[[]]
+ assert_series_equal(actual, expected)
+
  def test_groupby_whitelist(self):
  from string import ascii_lowercase
  letters = np.array(list(ascii_lowercase))