Skip to content
Prev Previous commit
Next Next commit
ENH: rolling rank - ascending flag
added the `ascending` flag, various cleanups, expanded tests and asv benchmark
  • Loading branch information
gsiano committed Sep 1, 2021
commit b135f1e0b1e8ef3e5aecf937bf050c4b35f3f96a
10 changes: 6 additions & 4 deletions asv_bench/benchmarks/rolling.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,16 +186,18 @@ class Rank:
[10, 1000],
["int", "float"],
[True, False],
[True, False],
["min", "max", "average"],
)
param_names = ["constructor", "window", "dtype", "percentile"]
param_names = ["constructor", "window", "dtype", "percentile", "ascending", "method"]

def setup(self, constructor, window, dtype, percentile):
def setup(self, constructor, window, dtype, percentile, ascending, method):
N = 10 ** 5
arr = np.random.random(N).astype(dtype)
self.roll = getattr(pd, constructor)(arr).rolling(window)

def time_rank(self, constructor, window, dtype, percentile):
self.roll.rank(percentile)
def time_rank(self, constructor, window, dtype, percentile, ascending, method):
self.roll.rank(pct=percentile, ascending=ascending, method=method)


class PeakMemFixedWindowMinMax:
Expand Down
15 changes: 0 additions & 15 deletions pandas/_libs/src/skiplist.h
Original file line number Diff line number Diff line change
Expand Up @@ -195,21 +195,6 @@ PANDAS_INLINE int skiplist_min_rank(skiplist_t *skp, double value) {
return rank + 1;
}

/*PANDAS_INLINE int skiplist_max_rank(skiplist_t *skp, double value) {
node_t *node;
int level, rank = 0;

node = skp->head;
for (level = skp->maxlevels - 1; level >= 0; --level) {
while (_node_cmp(node->next[level], value) >= 0) {
rank += node->width[level];
node = node->next[level];
}
}

return rank;
}*/

// Returns the rank of the inserted element. When there are duplicates, `rank` is the highest of
// the group, i.e. the 'max' method of
// https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.rank.html
Expand Down
3 changes: 2 additions & 1 deletion pandas/_libs/window/aggregations.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,8 @@ def roll_rank(
end: np.ndarray,
minp: int,
percentile: bool,
method: Literal["average", "min", "max", "first", "dense"],
method: Literal["average", "min", "max"],
ascending: bool,
) -> np.ndarray: ... # np.ndarray[float]
def roll_apply(
obj: object,
Expand Down
11 changes: 4 additions & 7 deletions pandas/_libs/window/aggregations.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,6 @@ cdef extern from "../src/skiplist.h":
int skiplist_remove(skiplist_t*, double) nogil
int skiplist_rank(skiplist_t*, double) nogil
int skiplist_min_rank(skiplist_t*, double) nogil
int skiplist_max_rank(skiplist_t*, double) nogil

cdef:
float32_t MINfloat32 = np.NINF
Expand Down Expand Up @@ -1154,13 +1153,11 @@ rank_types = {
'average': AVERAGE,
'min': MIN,
'max': MAX,
'first': FIRST,
'dense': DENSE,
}


def roll_rank(const float64_t[:] values, ndarray[int64_t] start,
ndarray[int64_t] end, int64_t minp, bint percentile, str method) -> np.ndarray:
ndarray[int64_t] end, int64_t minp, bint percentile, str method, bint ascending) -> np.ndarray:
"""
O(N log(window)) implementation using skip list

Expand Down Expand Up @@ -1208,7 +1205,7 @@ def roll_rank(const float64_t[:] values, ndarray[int64_t] start,

# setup
for j in range(s, e):
val = values[j]
val = values[j] if ascending else -values[j]
if notnan(val):
nobs += 1
rank = skiplist_insert(skiplist, val)
Expand All @@ -1223,14 +1220,14 @@ def roll_rank(const float64_t[:] values, ndarray[int64_t] start,
else:
# calculate deletes
for j in range(start[i - 1], s):
val = values[j]
val = values[j] if ascending else -values[j]
if notnan(val):
skiplist_remove(skiplist, val)
nobs -= 1

# calculate adds
for j in range(end[i - 1], e):
val = values[j]
val = values[j] if ascending else -values[j]
if notnan(val):
nobs += 1
rank = skiplist_insert(skiplist, val)
Expand Down
3 changes: 2 additions & 1 deletion pandas/core/window/rolling.py
Original file line number Diff line number Diff line change
Expand Up @@ -1409,11 +1409,12 @@ def quantile(self, quantile: float, interpolation: str = "linear", **kwargs):

return self._apply(window_func, name="quantile", **kwargs)

def rank(self, pct: bool = False, method: str = "average", **kwargs):
def rank(self, pct: bool = False, method: str = "average", ascending: bool = True, **kwargs):
window_func = partial(
window_aggregations.roll_rank,
percentile=pct,
method=method,
ascending=ascending,
)

return self._apply(window_func, name="rank", **kwargs)
Expand Down
17 changes: 10 additions & 7 deletions pandas/tests/window/test_rolling.py
Original file line number Diff line number Diff line change
Expand Up @@ -1505,15 +1505,18 @@ def test_rolling_numeric_dtypes():
@pytest.mark.parametrize("window", [1, 3, 10, 50, 1000])
@pytest.mark.parametrize("method", ["min", "max", "average"])
@pytest.mark.parametrize("pct", [True, False])
@pytest.mark.parametrize("dups", [True, False])
def test_rank(window, method, pct, dups):
@pytest.mark.parametrize("ascending", [True, False])
@pytest.mark.parametrize("test_data", ["default", "duplicates", "nans"])
def test_rank(window, method, pct, ascending, test_data):
length = 1000
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same

if dups:
ser = Series(data=np.random.choice(3, length))
else:
if test_data == "default":
ser = Series(data=np.random.rand(length))
elif test_data == "duplicates":
ser = Series(data=np.random.choice(3, length))
elif test_data == "nans":
ser = Series(data=np.random.choice([1.0, 0.25, 0.75, np.nan], length))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same comment as above about inf


expected = ser.rolling(window).apply(lambda x: x.rank(method=method, pct=pct).iloc[-1])
result = ser.rolling(window).rank(method=method, pct=pct)
expected = ser.rolling(window).apply(lambda x: x.rank(method=method, pct=pct, ascending=ascending).iloc[-1])
result = ser.rolling(window).rank(method=method, pct=pct, ascending=ascending)

tm.assert_series_equal(result, expected)