Skip to content

Commit 5b60546

Browse files
committed
use z-scores in rolling skew/kurt calculations
1 parent abd5333 commit 5b60546

File tree

2 files changed

+48
-10
lines changed

2 files changed

+48
-10
lines changed

pandas/algos.pyx

Lines changed: 27 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1087,7 +1087,7 @@ def ewmcov(ndarray[double_t] input_x, ndarray[double_t] input_y,
10871087
sum_wt = 1.
10881088
sum_wt2 = 1.
10891089
old_wt = 1.
1090-
1090+
10911091
for i from 1 <= i < N:
10921092
cur_x = input_x[i]
10931093
cur_y = input_y[i]
@@ -1117,7 +1117,7 @@ def ewmcov(ndarray[double_t] input_x, ndarray[double_t] input_y,
11171117
elif is_observation:
11181118
mean_x = cur_x
11191119
mean_y = cur_y
1120-
1120+
11211121
if nobs >= minp:
11221122
if not bias:
11231123
numerator = sum_wt * sum_wt
@@ -1344,10 +1344,32 @@ def roll_var(ndarray[double_t] input, int win, int minp, int ddof=1):
13441344
#-------------------------------------------------------------------------------
13451345
# Rolling skewness
13461346

1347-
def roll_skew(ndarray[double_t] input, int win, int minp):
1347+
@cython.boundscheck(False)
1348+
@cython.wraparound(False)
1349+
def _get_zscores(ndarray[double_t] inp):
1350+
"""removes mean and scales variance to one"""
1351+
cdef:
1352+
ndarray[double_t] out
1353+
ndarray[np.uint8_t, ndim=1, cast=True] mask
1354+
double_t mu, sigma
1355+
1356+
mask = np.isfinite(inp)
1357+
if not mask.any():
1358+
return inp
1359+
1360+
mu = inp[mask].mean()
1361+
out = inp - mu
1362+
sigma = out[mask].std()
1363+
if sigma > 0 and not np.isclose(sigma, 0.0):
1364+
out[mask] /= sigma
1365+
1366+
return out
1367+
1368+
def roll_skew(ndarray[double_t] inp, int win, int minp):
13481369
cdef double val, prev
13491370
cdef double x = 0, xx = 0, xxx = 0
13501371
cdef Py_ssize_t nobs = 0, i
1372+
cdef ndarray[double_t] input = _get_zscores(inp)
13511373
cdef Py_ssize_t N = len(input)
13521374

13531375
cdef ndarray[double_t] output = np.empty(N, dtype=float)
@@ -1405,11 +1427,12 @@ def roll_skew(ndarray[double_t] input, int win, int minp):
14051427
# Rolling kurtosis
14061428

14071429

1408-
def roll_kurt(ndarray[double_t] input,
1430+
def roll_kurt(ndarray[double_t] inp,
14091431
int win, int minp):
14101432
cdef double val, prev
14111433
cdef double x = 0, xx = 0, xxx = 0, xxxx = 0
14121434
cdef Py_ssize_t nobs = 0, i
1435+
cdef ndarray[double_t] input = _get_zscores(inp)
14131436
cdef Py_ssize_t N = len(input)
14141437

14151438
cdef ndarray[double_t] output = np.empty(N, dtype=float)

pandas/stats/tests/test_moments.py

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -321,6 +321,21 @@ def test_rolling_kurt(self):
321321
self._check_moment_func(mom.rolling_kurt,
322322
lambda x: kurtosis(x, bias=False))
323323

324+
def test_affine_invariance(self):
325+
"""
326+
rolling skew/kurt should be invariant under affine transformations
327+
"""
328+
329+
xs = np.random.rand(50)
330+
window = 10
331+
332+
for f in mom.rolling_skew, mom.rolling_kurt:
333+
left = f(xs, window)
334+
335+
for a, b in [(1, 100), (1, 5000), (100, 100), (100, 5000)]:
336+
right = f(a*xs + b, window)
337+
assert_almost_equal(left, right)
338+
324339
def test_fperr_robustness(self):
325340
# TODO: remove this once python 2.5 out of picture
326341
if PY3:
@@ -524,7 +539,7 @@ def test_ewma(self):
524539
self.assertTrue(np.abs(result - 1) < 1e-2)
525540

526541
s = Series([1.0, 2.0, 4.0, 8.0])
527-
542+
528543
expected = Series([1.0, 1.6, 2.736842, 4.923077])
529544
for f in [lambda s: mom.ewma(s, com=2.0, adjust=True),
530545
lambda s: mom.ewma(s, com=2.0, adjust=True, ignore_na=False),
@@ -750,7 +765,7 @@ def _non_null_values(x):
750765

751766
for (std, var, cov) in [(std_biased, var_biased, cov_biased),
752767
(std_unbiased, var_unbiased, cov_unbiased)]:
753-
768+
754769
# check that var(x), std(x), and cov(x) are all >= 0
755770
var_x = var(x)
756771
std_x = std(x)
@@ -762,7 +777,7 @@ def _non_null_values(x):
762777

763778
# check that var(x) == cov(x, x)
764779
assert_equal(var_x, cov_x_x)
765-
780+
766781
# check that var(x) == std(x)^2
767782
assert_equal(var_x, std_x * std_x)
768783

@@ -796,7 +811,7 @@ def _non_null_values(x):
796811
cov_x_y = cov(x, y)
797812
cov_y_x = cov(y, x)
798813
assert_equal(cov_x_y, cov_y_x)
799-
814+
800815
# check that cov(x, y) == (var(x+y) - var(x) - var(y)) / 2
801816
var_x_plus_y = var(x + y)
802817
var_y = var(y)
@@ -1007,7 +1022,7 @@ def test_rolling_consistency(self):
10071022
expected.iloc[:, i, j] = rolling_f(x.iloc[:, i], x.iloc[:, j],
10081023
window=window, min_periods=min_periods, center=center)
10091024
assert_panel_equal(rolling_f_result, expected)
1010-
1025+
10111026
# binary moments
10121027
def test_rolling_cov(self):
10131028
A = self.series
@@ -1432,7 +1447,7 @@ def test_expanding_corr_pairwise_diff_length(self):
14321447
assert_frame_equal(result2, expected)
14331448
assert_frame_equal(result3, expected)
14341449
assert_frame_equal(result4, expected)
1435-
1450+
14361451
def test_pairwise_stats_column_names_order(self):
14371452
# GH 7738
14381453
df1s = [DataFrame([[2,4],[1,2],[5,2],[8,1]], columns=[0,1]),

0 commit comments

Comments
 (0)