Skip to content

Commit cd6edc9

Browse files
Merge remote-tracking branch 'upstream/master' into ArrowStringArray.astype
2 parents 683da0b + 59b2db1 commit cd6edc9

File tree

225 files changed

+5361
-2677
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

225 files changed

+5361
-2677
lines changed

.github/workflows/ci.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ jobs:
6565
if: always()
6666

6767
- name: Testing docstring validation script
68-
run: pytest --capture=no --strict-markers scripts
68+
run: pytest scripts
6969
if: always()
7070

7171
- name: Running benchmarks

.github/workflows/database.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ jobs:
7878
uses: ./.github/actions/build_pandas
7979

8080
- name: Test
81-
run: pytest -m "${{ env.PATTERN }}" -n 2 --dist=loadfile -s --strict-markers --durations=30 --junitxml=test-data.xml -s --cov=pandas --cov-report=xml pandas/tests/io
81+
run: pytest -m "${{ env.PATTERN }}" -n 2 --dist=loadfile --cov=pandas --cov-report=xml pandas/tests/io
8282
if: always()
8383

8484
- name: Build Version

.github/workflows/python-dev.yml

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
name: Python Dev
2+
3+
on:
4+
push:
5+
branches:
6+
- master
7+
pull_request:
8+
branches:
9+
- master
10+
11+
jobs:
12+
build:
13+
runs-on: ubuntu-latest
14+
name: actions-310-dev
15+
timeout-minutes: 60
16+
17+
steps:
18+
- uses: actions/checkout@v2
19+
with:
20+
fetch-depth: 0
21+
22+
- name: Set up Python Dev Version
23+
uses: actions/setup-python@v2
24+
with:
25+
python-version: '3.10-dev'
26+
27+
- name: Install dependencies
28+
run: |
29+
python -m pip install --upgrade pip setuptools wheel
30+
pip install git+https://github.com/numpy/numpy.git
31+
pip install git+https://github.com/pytest-dev/pytest.git
32+
pip install git+https://github.com/nedbat/coveragepy.git
33+
pip install cython python-dateutil pytz hypothesis pytest-xdist
34+
pip list
35+
36+
- name: Build Pandas
37+
run: |
38+
python setup.py build_ext -q -j2
39+
python -m pip install -e . --no-build-isolation --no-use-pep517
40+
41+
- name: Build Version
42+
run: |
43+
python -c "import pandas; pandas.show_versions();"
44+
45+
- name: Test with pytest
46+
run: |
47+
coverage run -m pytest -m 'not slow and not network and not clipboard' pandas
48+
continue-on-error: true
49+
50+
- name: Publish test results
51+
uses: actions/upload-artifact@master
52+
with:
53+
name: Test results
54+
path: test-data.xml
55+
if: failure()
56+
57+
- name: Print skipped tests
58+
run: |
59+
python ci/print_skipped.py
60+
61+
- name: Report Coverage
62+
run: |
63+
coverage report -m
64+
65+
- name: Upload coverage to Codecov
66+
uses: codecov/codecov-action@v1
67+
with:
68+
flags: unittests
69+
name: codecov-pandas
70+
fail_ci_if_error: true

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,7 @@ asv_bench/env/
104104
asv_bench/html/
105105
asv_bench/results/
106106
asv_bench/pandas/
107+
test-data.xml
107108

108109
# Documentation generated files #
109110
#################################

.pre-commit-config.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ repos:
3535
exclude: ^pandas/_libs/src/(klib|headers)/
3636
args: [--quiet, '--extensions=c,h', '--headers=h', --recursive, '--filter=-readability/casting,-runtime/int,-build/include_subdir']
3737
- repo: https://gitlab.com/pycqa/flake8
38-
rev: 3.9.0
38+
rev: 3.9.1
3939
hooks:
4040
- id: flake8
4141
additional_dependencies:
@@ -75,7 +75,7 @@ repos:
7575
hooks:
7676
- id: yesqa
7777
additional_dependencies:
78-
- flake8==3.9.0
78+
- flake8==3.9.1
7979
- flake8-comprehensions==3.1.0
8080
- flake8-bugbear==21.3.2
8181
- pandas-dev-flaker==0.2.0

LICENSE

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ BSD 3-Clause License
33
Copyright (c) 2008-2011, AQR Capital Management, LLC, Lambda Foundry, Inc. and PyData Development Team
44
All rights reserved.
55

6-
Copyright (c) 2011-2020, Open source contributors.
6+
Copyright (c) 2011-2021, Open source contributors.
77

88
Redistribution and use in source and binary forms, with or without
99
modification, are permitted provided that the following conditions are met:

asv_bench/benchmarks/groupby.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -505,6 +505,34 @@ def time_frame_agg(self, dtype, method):
505505
self.df.groupby("key").agg(method)
506506

507507

508+
class CumminMax:
509+
param_names = ["dtype", "method"]
510+
params = [
511+
["float64", "int64", "Float64", "Int64"],
512+
["cummin", "cummax"],
513+
]
514+
515+
def setup(self, dtype, method):
516+
N = 500_000
517+
vals = np.random.randint(-10, 10, (N, 5))
518+
null_vals = vals.astype(float, copy=True)
519+
null_vals[::2, :] = np.nan
520+
null_vals[::3, :] = np.nan
521+
df = DataFrame(vals, columns=list("abcde"), dtype=dtype)
522+
null_df = DataFrame(null_vals, columns=list("abcde"), dtype=dtype)
523+
keys = np.random.randint(0, 100, size=N)
524+
df["key"] = keys
525+
null_df["key"] = keys
526+
self.df = df
527+
self.null_df = null_df
528+
529+
def time_frame_transform(self, dtype, method):
530+
self.df.groupby("key").transform(method)
531+
532+
def time_frame_transform_many_nulls(self, dtype, method):
533+
self.null_df.groupby("key").transform(method)
534+
535+
508536
class RankWithTies:
509537
# GH 21237
510538
param_names = ["dtype", "tie_method"]

asv_bench/benchmarks/io/style.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,19 +17,19 @@ def setup(self, cols, rows):
1717

1818
def time_apply_render(self, cols, rows):
1919
self._style_apply()
20-
self.st.render()
20+
self.st._render_html()
2121

2222
def peakmem_apply_render(self, cols, rows):
2323
self._style_apply()
24-
self.st.render()
24+
self.st._render_html()
2525

2626
def time_classes_render(self, cols, rows):
2727
self._style_classes()
28-
self.st.render()
28+
self.st._render_html()
2929

3030
def peakmem_classes_render(self, cols, rows):
3131
self._style_classes()
32-
self.st.render()
32+
self.st._render_html()
3333

3434
def _style_apply(self):
3535
def _apply_func(s):

asv_bench/benchmarks/strings.py

Lines changed: 74 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -50,91 +50,126 @@ def peakmem_cat_frame_construction(self, dtype):
5050

5151

5252
class Methods:
53-
def setup(self):
54-
self.s = Series(tm.makeStringIndex(10 ** 5))
53+
params = ["str", "string", "arrow_string"]
54+
param_names = ["dtype"]
55+
56+
def setup(self, dtype):
57+
from pandas.core.arrays.string_arrow import ArrowStringDtype # noqa: F401
5558

56-
def time_center(self):
59+
try:
60+
self.s = Series(tm.makeStringIndex(10 ** 5), dtype=dtype)
61+
except ImportError:
62+
raise NotImplementedError
63+
64+
def time_center(self, dtype):
5765
self.s.str.center(100)
5866

59-
def time_count(self):
67+
def time_count(self, dtype):
6068
self.s.str.count("A")
6169

62-
def time_endswith(self):
70+
def time_endswith(self, dtype):
6371
self.s.str.endswith("A")
6472

65-
def time_extract(self):
73+
def time_extract(self, dtype):
6674
with warnings.catch_warnings(record=True):
6775
self.s.str.extract("(\\w*)A(\\w*)")
6876

69-
def time_findall(self):
77+
def time_findall(self, dtype):
7078
self.s.str.findall("[A-Z]+")
7179

72-
def time_find(self):
80+
def time_find(self, dtype):
7381
self.s.str.find("[A-Z]+")
7482

75-
def time_rfind(self):
83+
def time_rfind(self, dtype):
7684
self.s.str.rfind("[A-Z]+")
7785

78-
def time_get(self):
86+
def time_get(self, dtype):
7987
self.s.str.get(0)
8088

81-
def time_len(self):
89+
def time_len(self, dtype):
8290
self.s.str.len()
8391

84-
def time_join(self):
92+
def time_join(self, dtype):
8593
self.s.str.join(" ")
8694

87-
def time_match(self):
95+
def time_match(self, dtype):
8896
self.s.str.match("A")
8997

90-
def time_normalize(self):
98+
def time_normalize(self, dtype):
9199
self.s.str.normalize("NFC")
92100

93-
def time_pad(self):
101+
def time_pad(self, dtype):
94102
self.s.str.pad(100, side="both")
95103

96-
def time_partition(self):
104+
def time_partition(self, dtype):
97105
self.s.str.partition("A")
98106

99-
def time_rpartition(self):
107+
def time_rpartition(self, dtype):
100108
self.s.str.rpartition("A")
101109

102-
def time_replace(self):
110+
def time_replace(self, dtype):
103111
self.s.str.replace("A", "\x01\x01")
104112

105-
def time_translate(self):
113+
def time_translate(self, dtype):
106114
self.s.str.translate({"A": "\x01\x01"})
107115

108-
def time_slice(self):
116+
def time_slice(self, dtype):
109117
self.s.str.slice(5, 15, 2)
110118

111-
def time_startswith(self):
119+
def time_startswith(self, dtype):
112120
self.s.str.startswith("A")
113121

114-
def time_strip(self):
122+
def time_strip(self, dtype):
115123
self.s.str.strip("A")
116124

117-
def time_rstrip(self):
125+
def time_rstrip(self, dtype):
118126
self.s.str.rstrip("A")
119127

120-
def time_lstrip(self):
128+
def time_lstrip(self, dtype):
121129
self.s.str.lstrip("A")
122130

123-
def time_title(self):
131+
def time_title(self, dtype):
124132
self.s.str.title()
125133

126-
def time_upper(self):
134+
def time_upper(self, dtype):
127135
self.s.str.upper()
128136

129-
def time_lower(self):
137+
def time_lower(self, dtype):
130138
self.s.str.lower()
131139

132-
def time_wrap(self):
140+
def time_wrap(self, dtype):
133141
self.s.str.wrap(10)
134142

135-
def time_zfill(self):
143+
def time_zfill(self, dtype):
136144
self.s.str.zfill(10)
137145

146+
def time_isalnum(self, dtype):
147+
self.s.str.isalnum()
148+
149+
def time_isalpha(self, dtype):
150+
self.s.str.isalpha()
151+
152+
def time_isdecimal(self, dtype):
153+
self.s.str.isdecimal()
154+
155+
def time_isdigit(self, dtype):
156+
self.s.str.isdigit()
157+
158+
def time_islower(self, dtype):
159+
self.s.str.islower()
160+
161+
def time_isnumeric(self, dtype):
162+
self.s.str.isnumeric()
163+
164+
def time_isspace(self, dtype):
165+
self.s.str.isspace()
166+
167+
def time_istitle(self, dtype):
168+
self.s.str.istitle()
169+
170+
def time_isupper(self, dtype):
171+
self.s.str.isupper()
172+
138173

139174
class Repeat:
140175

@@ -178,13 +213,18 @@ def time_cat(self, other_cols, sep, na_rep, na_frac):
178213

179214
class Contains:
180215

181-
params = [True, False]
182-
param_names = ["regex"]
216+
params = (["str", "string", "arrow_string"], [True, False])
217+
param_names = ["dtype", "regex"]
218+
219+
def setup(self, dtype, regex):
220+
from pandas.core.arrays.string_arrow import ArrowStringDtype # noqa: F401
183221

184-
def setup(self, regex):
185-
self.s = Series(tm.makeStringIndex(10 ** 5))
222+
try:
223+
self.s = Series(tm.makeStringIndex(10 ** 5), dtype=dtype)
224+
except ImportError:
225+
raise NotImplementedError
186226

187-
def time_contains(self, regex):
227+
def time_contains(self, dtype, regex):
188228
self.s.str.contains("A", regex=regex)
189229

190230

0 commit comments

Comments
 (0)