Skip to content

Commit f769ae0

Browse files
committed
Merge remote-tracking branch 'upstream/master' into perf_sparse_min_max
2 parents 2d2545b + 6549a74 commit f769ae0

File tree

19 files changed

+321
-248
lines changed

19 files changed

+321
-248
lines changed

ci/azure/posix.yml

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,16 @@ jobs:
99
strategy:
1010
matrix:
1111
${{ if eq(parameters.name, 'macOS') }}:
12-
py38_macos:
12+
py38_macos_1:
1313
ENV_FILE: ci/deps/azure-macos-38.yaml
1414
CONDA_PY: "38"
1515
PATTERN: "not slow and not network"
16+
PYTEST_TARGET: "pandas/tests/[a-h]*"
17+
py38_macos_2:
18+
ENV_FILE: ci/deps/azure-macos-38.yaml
19+
CONDA_PY: "38"
20+
PATTERN: "not slow and not network"
21+
PYTEST_TARGET: "pandas/tests/[i-z]*"
1622

1723
steps:
1824
- script: echo '##vso[task.prependpath]$(HOME)/miniconda3/bin'

ci/azure/windows.yml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,28 +13,28 @@ jobs:
1313
CONDA_PY: "38"
1414
PATTERN: "not slow and not network"
1515
PYTEST_WORKERS: 2 # GH-42236
16-
PYTEST_TARGET: "pandas/tests/[a-i]*"
16+
PYTEST_TARGET: "pandas/tests/[a-h]*"
1717

1818
py38_np18_2:
1919
ENV_FILE: ci/deps/azure-windows-38.yaml
2020
CONDA_PY: "38"
2121
PATTERN: "not slow and not network"
2222
PYTEST_WORKERS: 2 # GH-42236
23-
PYTEST_TARGET: "pandas/tests/[j-z]*"
23+
PYTEST_TARGET: "pandas/tests/[i-z]*"
2424

2525
py39_1:
2626
ENV_FILE: ci/deps/azure-windows-39.yaml
2727
CONDA_PY: "39"
2828
PATTERN: "not slow and not network and not high_memory"
2929
PYTEST_WORKERS: 2 # GH-42236
30-
PYTEST_TARGET: "pandas/tests/[a-i]*"
30+
PYTEST_TARGET: "pandas/tests/[a-h]*"
3131

3232
py39_2:
3333
ENV_FILE: ci/deps/azure-windows-39.yaml
3434
CONDA_PY: "39"
3535
PATTERN: "not slow and not network and not high_memory"
3636
PYTEST_WORKERS: 2 # GH-42236
37-
PYTEST_TARGET: "pandas/tests/[j-z]*"
37+
PYTEST_TARGET: "pandas/tests/[i-z]*"
3838

3939
steps:
4040
- powershell: |

doc/source/whatsnew/index.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ Version 1.3
2424
.. toctree::
2525
:maxdepth: 2
2626

27+
v1.3.4
2728
v1.3.3
2829
v1.3.2
2930
v1.3.1

doc/source/whatsnew/v1.3.3.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,4 +54,4 @@ Bug fixes
5454
Contributors
5555
~~~~~~~~~~~~
5656

57-
.. contributors:: v1.3.2..v1.3.3|HEAD
57+
.. contributors:: v1.3.2..v1.3.3

doc/source/whatsnew/v1.3.4.rst

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
.. _whatsnew_134:
2+
3+
What's new in 1.3.4 (October ??, 2021)
4+
--------------------------------------
5+
6+
These are the changes in pandas 1.3.4. See :ref:`release` for a full changelog
7+
including other versions of pandas.
8+
9+
{{ header }}
10+
11+
.. ---------------------------------------------------------------------------
12+
13+
.. _whatsnew_134.regressions:
14+
15+
Fixed regressions
16+
~~~~~~~~~~~~~~~~~
17+
-
18+
-
19+
20+
.. ---------------------------------------------------------------------------
21+
22+
.. _whatsnew_134.bug_fixes:
23+
24+
Bug fixes
25+
~~~~~~~~~
26+
-
27+
-
28+
29+
.. ---------------------------------------------------------------------------
30+
31+
.. _whatsnew_134.other:
32+
33+
Other
34+
~~~~~
35+
-
36+
-
37+
38+
.. ---------------------------------------------------------------------------
39+
40+
.. _whatsnew_134.contributors:
41+
42+
Contributors
43+
~~~~~~~~~~~~
44+
45+
.. contributors:: v1.3.3..v1.3.4|HEAD

doc/source/whatsnew/v1.4.0.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -282,6 +282,8 @@ Other Deprecations
282282
- Deprecated passing in a string column label into ``times`` in :meth:`DataFrame.ewm` (:issue:`43265`)
283283
- Deprecated the 'include_start' and 'include_end' arguments in :meth:`DataFrame.between_time`; in a future version passing 'include_start' or 'include_end' will raise (:issue:`40245`)
284284
- Deprecated the ``squeeze`` argument to :meth:`read_csv`, :meth:`read_table`, and :meth:`read_excel`. Users should squeeze the DataFrame afterwards with ``.squeeze("columns")`` instead. (:issue:`43242`)
285+
- Deprecated the ``index`` argument to :class:`SparseArray` construction (:issue:`23089`)
286+
-
285287

286288
.. ---------------------------------------------------------------------------
287289

pandas/_libs/groupby.pyi

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@ from typing import Literal
22

33
import numpy as np
44

5+
from pandas._typing import npt
6+
57
def group_median_float64(
68
out: np.ndarray, # ndarray[float64_t, ndim=2]
79
counts: np.ndarray, # ndarray[int64_t]
@@ -34,6 +36,7 @@ def group_shift_indexer(
3436
def group_fillna_indexer(
3537
out: np.ndarray, # ndarray[intp_t]
3638
labels: np.ndarray, # ndarray[int64_t]
39+
sorted_labels: npt.NDArray[np.intp],
3740
mask: np.ndarray, # ndarray[uint8_t]
3841
direction: Literal["ffill", "bfill"],
3942
limit: int, # int64_t
@@ -84,11 +87,12 @@ def group_ohlc(
8487
min_count: int = ...,
8588
) -> None: ...
8689
def group_quantile(
87-
out: np.ndarray, # ndarray[float64_t, ndim=2]
90+
out: npt.NDArray[np.float64],
8891
values: np.ndarray, # ndarray[numeric, ndim=1]
89-
labels: np.ndarray, # ndarray[int64_t]
90-
mask: np.ndarray, # ndarray[uint8_t]
91-
qs: np.ndarray, # const float64_t[:]
92+
labels: npt.NDArray[np.intp],
93+
mask: npt.NDArray[np.uint8],
94+
sort_indexer: npt.NDArray[np.intp], # const
95+
qs: npt.NDArray[np.float64], # const
9296
interpolation: Literal["linear", "lower", "higher", "nearest", "midpoint"],
9397
) -> None: ...
9498
def group_last(

pandas/_libs/groupby.pyx

Lines changed: 11 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -322,6 +322,7 @@ def group_shift_indexer(int64_t[::1] out, const intp_t[::1] labels,
322322
@cython.wraparound(False)
323323
@cython.boundscheck(False)
324324
def group_fillna_indexer(ndarray[intp_t] out, ndarray[intp_t] labels,
325+
ndarray[intp_t] sorted_labels,
325326
ndarray[uint8_t] mask, str direction,
326327
int64_t limit, bint dropna) -> None:
327328
"""
@@ -334,6 +335,9 @@ def group_fillna_indexer(ndarray[intp_t] out, ndarray[intp_t] labels,
334335
labels : np.ndarray[np.intp]
335336
Array containing unique label for each group, with its ordering
336337
matching up to the corresponding record in `values`.
338+
sorted_labels : np.ndarray[np.intp]
339+
obtained by `np.argsort(labels, kind="mergesort")`; reversed if
340+
direction == "bfill"
337341
values : np.ndarray[np.uint8]
338342
Containing the truth value of each element.
339343
mask : np.ndarray[np.uint8]
@@ -349,7 +353,6 @@ def group_fillna_indexer(ndarray[intp_t] out, ndarray[intp_t] labels,
349353
"""
350354
cdef:
351355
Py_ssize_t i, N, idx
352-
intp_t[:] sorted_labels
353356
intp_t curr_fill_idx=-1
354357
int64_t filled_vals = 0
355358

@@ -358,11 +361,6 @@ def group_fillna_indexer(ndarray[intp_t] out, ndarray[intp_t] labels,
358361
# Make sure all arrays are the same size
359362
assert N == len(labels) == len(mask)
360363

361-
sorted_labels = np.argsort(labels, kind='mergesort').astype(
362-
np.intp, copy=False)
363-
if direction == 'bfill':
364-
sorted_labels = sorted_labels[::-1]
365-
366364
with nogil:
367365
for i in range(N):
368366
idx = sorted_labels[i]
@@ -774,6 +772,7 @@ def group_quantile(ndarray[float64_t, ndim=2] out,
774772
ndarray[numeric, ndim=1] values,
775773
ndarray[intp_t] labels,
776774
ndarray[uint8_t] mask,
775+
const intp_t[:] sort_indexer,
777776
const float64_t[:] qs,
778777
str interpolation) -> None:
779778
"""
@@ -787,6 +786,8 @@ def group_quantile(ndarray[float64_t, ndim=2] out,
787786
Array containing the values to apply the function against.
788787
labels : ndarray[np.intp]
789788
Array containing the unique group labels.
789+
sort_indexer : ndarray[np.intp]
790+
Indices describing sort order by values and labels.
790791
qs : ndarray[float64_t]
791792
The quantile values to search for.
792793
interpolation : {'linear', 'lower', 'highest', 'nearest', 'midpoint'}
@@ -800,9 +801,9 @@ def group_quantile(ndarray[float64_t, ndim=2] out,
800801
Py_ssize_t i, N=len(labels), ngroups, grp_sz, non_na_sz, k, nqs
801802
Py_ssize_t grp_start=0, idx=0
802803
intp_t lab
803-
uint8_t interp
804+
InterpolationEnumType interp
804805
float64_t q_val, q_idx, frac, val, next_val
805-
ndarray[int64_t] counts, non_na_counts, sort_arr
806+
int64_t[::1] counts, non_na_counts
806807

807808
assert values.shape[0] == N
808809

@@ -837,16 +838,6 @@ def group_quantile(ndarray[float64_t, ndim=2] out,
837838
if not mask[i]:
838839
non_na_counts[lab] += 1
839840

840-
# Get an index of values sorted by labels and then values
841-
if labels.any():
842-
# Put '-1' (NaN) labels as the last group so it does not interfere
843-
# with the calculations.
844-
labels_for_lexsort = np.where(labels == -1, labels.max() + 1, labels)
845-
else:
846-
labels_for_lexsort = labels
847-
order = (values, labels_for_lexsort)
848-
sort_arr = np.lexsort(order).astype(np.int64, copy=False)
849-
850841
with nogil:
851842
for i in range(ngroups):
852843
# Figure out how many group elements there are
@@ -864,7 +855,7 @@ def group_quantile(ndarray[float64_t, ndim=2] out,
864855
# Casting to int will intentionally truncate result
865856
idx = grp_start + <int64_t>(q_val * <float64_t>(non_na_sz - 1))
866857

867-
val = values[sort_arr[idx]]
858+
val = values[sort_indexer[idx]]
868859
# If requested quantile falls evenly on a particular index
869860
# then write that index's value out. Otherwise interpolate
870861
q_idx = q_val * (non_na_sz - 1)
@@ -873,7 +864,7 @@ def group_quantile(ndarray[float64_t, ndim=2] out,
873864
if frac == 0.0 or interp == INTERPOLATION_LOWER:
874865
out[i, k] = val
875866
else:
876-
next_val = values[sort_arr[idx + 1]]
867+
next_val = values[sort_indexer[idx + 1]]
877868
if interp == INTERPOLATION_LINEAR:
878869
out[i, k] = val + (next_val - val) * frac
879870
elif interp == INTERPOLATION_HIGHER:

pandas/_libs/internals.pyx

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -760,14 +760,24 @@ cdef class BlockManager:
760760
cdef BlockManager _get_index_slice(self, slobj):
761761
cdef:
762762
SharedBlock blk, nb
763+
BlockManager mgr
764+
ndarray blknos, blklocs
763765

764766
nbs = []
765767
for blk in self.blocks:
766768
nb = blk.getitem_block_index(slobj)
767769
nbs.append(nb)
768770

769771
new_axes = [self.axes[0], self.axes[1]._getitem_slice(slobj)]
770-
return type(self)(tuple(nbs), new_axes, verify_integrity=False)
772+
mgr = type(self)(tuple(nbs), new_axes, verify_integrity=False)
773+
774+
# We can avoid having to rebuild blklocs/blknos
775+
blklocs = self._blklocs
776+
blknos = self._blknos
777+
if blknos is not None:
778+
mgr._blknos = blknos.copy()
779+
mgr._blklocs = blklocs.copy()
780+
return mgr
771781

772782
def get_slice(self, slobj: slice, axis: int = 0) -> BlockManager:
773783

pandas/_libs/sparse.pyi

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
from typing import (
2+
Sequence,
3+
TypeVar,
4+
)
5+
6+
import numpy as np
7+
8+
from pandas._typing import npt
9+
10+
SparseIndexT = TypeVar("SparseIndexT", bound="SparseIndex")
11+
12+
class SparseIndex:
13+
length: int
14+
npoints: int
15+
def __init__(self): ...
16+
@property
17+
def ngaps(self) -> int: ...
18+
@property
19+
def nbytes(self) -> int: ...
20+
def equals(self, other) -> bool: ...
21+
def lookup(self, index: int) -> np.int32: ...
22+
def lookup_array(self, indexer: npt.NDArray[np.int32]) -> npt.NDArray[np.int32]: ...
23+
def to_int_index(self) -> IntIndex: ...
24+
def to_block_index(self) -> BlockIndex: ...
25+
def intersect(self: SparseIndexT, y_: SparseIndex) -> SparseIndexT: ...
26+
def make_union(self: SparseIndexT, y_: SparseIndex) -> SparseIndexT: ...
27+
28+
class IntIndex(SparseIndex):
29+
indices: npt.NDArray[np.int32]
30+
def __init__(
31+
self, length: int, indices: Sequence[int], check_integrity: bool = True
32+
): ...
33+
34+
class BlockIndex(SparseIndex):
35+
nblocks: int
36+
blocs: np.ndarray
37+
blengths: np.ndarray
38+
def __init__(self, length: int, blocs: np.ndarray, blengths: np.ndarray): ...
39+
40+
def make_mask_object_ndarray(
41+
arr: npt.NDArray[np.object_], fill_value
42+
) -> npt.NDArray[np.bool_]: ...
43+
def get_blocks(
44+
indices: npt.NDArray[np.int32],
45+
) -> tuple[npt.NDArray[np.int32], npt.NDArray[np.int32]]: ...

0 commit comments

Comments
 (0)