Skip to content
Prev Previous commit
Next Next commit
ENH: rolling rank - address pre-commit errors
  • Loading branch information
gsiano committed Sep 2, 2021
commit 6b23fc019da517e22fa3f742b822f245e00ed31a
9 changes: 8 additions & 1 deletion asv_bench/benchmarks/rolling.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,14 @@ class Rank:
[True, False],
["min", "max", "average"],
)
param_names = ["constructor", "window", "dtype", "percentile", "ascending", "method"]
param_names = [
"constructor",
"window",
"dtype",
"percentile",
"ascending",
"method",
]

def setup(self, constructor, window, dtype, percentile, ascending, method):
N = 10 ** 5
Expand Down
4 changes: 2 additions & 2 deletions pandas/_libs/src/skiplist.h
Original file line number Diff line number Diff line change
Expand Up @@ -195,8 +195,8 @@ PANDAS_INLINE int skiplist_min_rank(skiplist_t *skp, double value) {
return rank + 1;
}

// Returns the rank of the inserted element. When there are duplicates, `rank` is the highest of
// the group, i.e. the 'max' method of
// Returns the rank of the inserted element. When there are duplicates,
// `rank` is the highest of the group, i.e. the 'max' method of
// https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.rank.html
PANDAS_INLINE int skiplist_insert(skiplist_t *skp, double value) {
node_t *node, *prevnode, *newnode, *next_at_level;
Expand Down
11 changes: 8 additions & 3 deletions pandas/_libs/window/aggregations.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -1155,7 +1155,8 @@ rank_types = {


def roll_rank(const float64_t[:] values, ndarray[int64_t] start,
ndarray[int64_t] end, int64_t minp, bint percentile, str method, bint ascending) -> np.ndarray:
ndarray[int64_t] end, int64_t minp, bint percentile,
str method, bint ascending) -> np.ndarray:
"""
O(N log(window)) implementation using skip list

Expand Down Expand Up @@ -1211,7 +1212,9 @@ def roll_rank(const float64_t[:] values, ndarray[int64_t] start,
raise MemoryError("skiplist_insert failed")
if rank_type == AVERAGE:
rank_min = skiplist_min_rank(skiplist, val)
rank = ((rank * (rank + 1) / 2) - ((rank_min - 1) * rank_min / 2)) / (rank - rank_min + 1)
rank = ((rank * (rank + 1) / 2)
- ((rank_min - 1) * rank_min / 2)) \
/ (rank - rank_min + 1)
elif rank_type == MIN:
rank = skiplist_min_rank(skiplist, val)

Expand All @@ -1233,7 +1236,9 @@ def roll_rank(const float64_t[:] values, ndarray[int64_t] start,
raise MemoryError("skiplist_insert failed")
if rank_type == AVERAGE:
rank_min = skiplist_min_rank(skiplist, val)
rank = ((rank * (rank + 1) / 2) - ((rank_min - 1) * rank_min / 2)) / (rank - rank_min + 1)
rank = ((rank * (rank + 1) / 2)
- ((rank_min - 1) * rank_min / 2)) \
/ (rank - rank_min + 1)
elif rank_type == MIN:
rank = skiplist_min_rank(skiplist, val)
if nobs >= minp:
Expand Down
10 changes: 8 additions & 2 deletions pandas/core/window/rolling.py
Original file line number Diff line number Diff line change
Expand Up @@ -1409,12 +1409,18 @@ def quantile(self, quantile: float, interpolation: str = "linear", **kwargs):

return self._apply(window_func, name="quantile", **kwargs)

def rank(self, method: str = "average", ascending: bool = True, pct: bool = False, **kwargs):
def rank(
self,
method: str = "average",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same comment here

ascending: bool = True,
pct: bool = False,
**kwargs,
):
window_func = partial(
window_aggregations.roll_rank,
percentile=pct,
method=method,
ascending=ascending,
percentile=pct,
)

return self._apply(window_func, name="rank", **kwargs)
Expand Down
4 changes: 3 additions & 1 deletion pandas/tests/window/test_rolling.py
Original file line number Diff line number Diff line change
Expand Up @@ -1516,7 +1516,9 @@ def test_rank(window, method, pct, ascending, test_data):
elif test_data == "nans":
ser = Series(data=np.random.choice([1.0, 0.25, 0.75, np.nan], length))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same comment as above about inf


expected = ser.rolling(window).apply(lambda x: x.rank(method=method, pct=pct, ascending=ascending).iloc[-1])
expected = ser.rolling(window).apply(
lambda x: x.rank(method=method, pct=pct, ascending=ascending).iloc[-1]
)
result = ser.rolling(window).rank(method=method, pct=pct, ascending=ascending)

tm.assert_series_equal(result, expected)