ENH: rolling rank - ascending flag

added the `ascending` flag, various cleanups, expanded tests and asv benchmark
pandas-dev · jreback · Sep 14, 2021 · Aug 31, 2021 · Aug 31, 2021 · Aug 31, 2021
commit b135f1e0b1e8ef3e5aecf937bf050c4b35f3f96a
diff --git a/asv_bench/benchmarks/rolling.py b/asv_bench/benchmarks/rolling.py
@@ -186,16 +186,18 @@ class Rank:
  [10, 1000],
  ["int", "float"],
  [True, False],
+ [True, False],
+ ["min", "max", "average"],
  )
- param_names = ["constructor", "window", "dtype", "percentile"]
+ param_names = ["constructor", "window", "dtype", "percentile", "ascending", "method"]
 
- def setup(self, constructor, window, dtype, percentile):
+ def setup(self, constructor, window, dtype, percentile, ascending, method):
  N = 10 ** 5
  arr = np.random.random(N).astype(dtype)
  self.roll = getattr(pd, constructor)(arr).rolling(window)
 
- def time_rank(self, constructor, window, dtype, percentile):
- self.roll.rank(percentile)
+ def time_rank(self, constructor, window, dtype, percentile, ascending, method):
+ self.roll.rank(pct=percentile, ascending=ascending, method=method)
 
 
 class PeakMemFixedWindowMinMax:

diff --git a/pandas/_libs/src/skiplist.h b/pandas/_libs/src/skiplist.h
@@ -195,21 +195,6 @@ PANDAS_INLINE int skiplist_min_rank(skiplist_t *skp, double value) {
  return rank + 1;
 }
 
-/*PANDAS_INLINE int skiplist_max_rank(skiplist_t *skp, double value) {
- node_t *node;
- int level, rank = 0;
-
- node = skp->head;
- for (level = skp->maxlevels - 1; level >= 0; --level) {
- while (_node_cmp(node->next[level], value) >= 0) {
- rank += node->width[level];
- node = node->next[level];
- }
- }
-
- return rank;
-}*/
-
 // Returns the rank of the inserted element. When there are duplicates, `rank` is the highest of
 // the group, i.e. the 'max' method of
 // https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.rank.html

diff --git a/pandas/_libs/window/aggregations.pyi b/pandas/_libs/window/aggregations.pyi
@@ -69,7 +69,8 @@ def roll_rank(
  end: np.ndarray,
  minp: int,
  percentile: bool,
- method: Literal["average", "min", "max", "first", "dense"],
+ method: Literal["average", "min", "max"],
+ ascending: bool,
 ) -> np.ndarray: ... # np.ndarray[float]
 def roll_apply(
  obj: object,

diff --git a/pandas/_libs/window/aggregations.pyx b/pandas/_libs/window/aggregations.pyx
@@ -52,7 +52,6 @@ cdef extern from "../src/skiplist.h":
  int skiplist_remove(skiplist_t*, double) nogil
  int skiplist_rank(skiplist_t*, double) nogil
  int skiplist_min_rank(skiplist_t*, double) nogil
- int skiplist_max_rank(skiplist_t*, double) nogil
 
 cdef:
  float32_t MINfloat32 = np.NINF
@@ -1154,13 +1153,11 @@ rank_types = {
  'average': AVERAGE,
  'min': MIN,
  'max': MAX,
- 'first': FIRST,
- 'dense': DENSE,
 }
 
 
 def roll_rank(const float64_t[:] values, ndarray[int64_t] start,
- ndarray[int64_t] end, int64_t minp, bint percentile, str method) -> np.ndarray:
+ ndarray[int64_t] end, int64_t minp, bint percentile, str method, bint ascending) -> np.ndarray:
  """
  O(N log(window)) implementation using skip list
 
@@ -1208,7 +1205,7 @@ def roll_rank(const float64_t[:] values, ndarray[int64_t] start,
 
  # setup
  for j in range(s, e):
- val = values[j]
+ val = values[j] if ascending else -values[j]
  if notnan(val):
  nobs += 1
  rank = skiplist_insert(skiplist, val)
@@ -1223,14 +1220,14 @@ def roll_rank(const float64_t[:] values, ndarray[int64_t] start,
  else:
  # calculate deletes
  for j in range(start[i - 1], s):
- val = values[j]
+ val = values[j] if ascending else -values[j]
  if notnan(val):
  skiplist_remove(skiplist, val)
  nobs -= 1
 
  # calculate adds
  for j in range(end[i - 1], e):
- val = values[j]
+ val = values[j] if ascending else -values[j]
  if notnan(val):
  nobs += 1
  rank = skiplist_insert(skiplist, val)

diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py
@@ -1409,11 +1409,12 @@ def quantile(self, quantile: float, interpolation: str = "linear", **kwargs):
 
  return self._apply(window_func, name="quantile", **kwargs)
 
- def rank(self, pct: bool = False, method: str = "average", **kwargs):
+ def rank(self, pct: bool = False, method: str = "average", ascending: bool = True, **kwargs):
  window_func = partial(
  window_aggregations.roll_rank,
  percentile=pct,
  method=method,
+ ascending=ascending,
  )
 
  return self._apply(window_func, name="rank", **kwargs)

diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py
@@ -1505,15 +1505,18 @@ def test_rolling_numeric_dtypes():
 @pytest.mark.parametrize("window", [1, 3, 10, 50, 1000])
 @pytest.mark.parametrize("method", ["min", "max", "average"])
 @pytest.mark.parametrize("pct", [True, False])
-@pytest.mark.parametrize("dups", [True, False])
-def test_rank(window, method, pct, dups):
+@pytest.mark.parametrize("ascending", [True, False])
+@pytest.mark.parametrize("test_data", ["default", "duplicates", "nans"])
+def test_rank(window, method, pct, ascending, test_data):
  length = 1000
- if dups:
- ser = Series(data=np.random.choice(3, length))
- else:
+ if test_data == "default":
  ser = Series(data=np.random.rand(length))
+ elif test_data == "duplicates":
+ ser = Series(data=np.random.choice(3, length))
+ elif test_data == "nans":
+ ser = Series(data=np.random.choice([1.0, 0.25, 0.75, np.nan], length))
 
- expected = ser.rolling(window).apply(lambda x: x.rank(method=method, pct=pct).iloc[-1])
- result = ser.rolling(window).rank(method=method, pct=pct)
+ expected = ser.rolling(window).apply(lambda x: x.rank(method=method, pct=pct, ascending=ascending).iloc[-1])
+ result = ser.rolling(window).rank(method=method, pct=pct, ascending=ascending)
 
  tm.assert_series_equal(result, expected)