pandas-dev
diff --git a/‎.github/workflows/ci.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/ci.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/database.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/database.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/python-dev.yml‎
Lines changed: 70 additions & 0 deletions b/‎.github/workflows/python-dev.yml‎
Lines changed: 70 additions & 0 deletions
diff --git a/‎.gitignore‎
Lines changed: 1 addition & 0 deletions b/‎.gitignore‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎.pre-commit-config.yaml‎
Lines changed: 2 additions & 2 deletions b/‎.pre-commit-config.yaml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎LICENSE‎
Lines changed: 1 addition & 1 deletion b/‎LICENSE‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎asv_bench/benchmarks/groupby.py‎
Lines changed: 28 additions & 0 deletions b/‎asv_bench/benchmarks/groupby.py‎
Lines changed: 28 additions & 0 deletions
diff --git a/‎asv_bench/benchmarks/io/style.py‎
Lines changed: 4 additions & 4 deletions b/‎asv_bench/benchmarks/io/style.py‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎asv_bench/benchmarks/strings.py‎
Lines changed: 74 additions & 34 deletions b/‎asv_bench/benchmarks/strings.py‎
Lines changed: 74 additions & 34 deletions
@@ -65,7 +65,7 @@ jobs:
  if: always()
 
  - name: Testing docstring validation script
- run: pytest --capture=no --strict-markers scripts
+ run: pytest scripts
  if: always()
 
  - name: Running benchmarks
 
@@ -78,7 +78,7 @@ jobs:
  uses: ./.github/actions/build_pandas
 
  - name: Test
- run: pytest -m "${{ env.PATTERN }}" -n 2 --dist=loadfile -s --strict-markers --durations=30 --junitxml=test-data.xml -s --cov=pandas --cov-report=xml pandas/tests/io
+ run: pytest -m "${{ env.PATTERN }}" -n 2 --dist=loadfile --cov=pandas --cov-report=xml pandas/tests/io
  if: always()
 
  - name: Build Version
 
@@ -0,0 +1,70 @@
+name: Python Dev
+
+on:
+ push:
+ branches:
+ - master
+ pull_request:
+ branches:
+ - master
+
+jobs:
+ build:
+ runs-on: ubuntu-latest
+ name: actions-310-dev
+ timeout-minutes: 60
+
+ steps:
+ - uses: actions/checkout@v2
+ with:
+ fetch-depth: 0
+
+ - name: Set up Python Dev Version
+ uses: actions/setup-python@v2
+ with:
+ python-version: '3.10-dev'
+
+ - name: Install dependencies
+ run: |
+ python -m pip install --upgrade pip setuptools wheel
+ pip install git+https://github.com/numpy/numpy.git
+ pip install git+https://github.com/pytest-dev/pytest.git
+ pip install git+https://github.com/nedbat/coveragepy.git
+ pip install cython python-dateutil pytz hypothesis pytest-xdist
+ pip list
+
+ - name: Build Pandas
+ run: |
+ python setup.py build_ext -q -j2
+ python -m pip install -e . --no-build-isolation --no-use-pep517
+
+ - name: Build Version
+ run: |
+ python -c "import pandas; pandas.show_versions();"
+
+ - name: Test with pytest
+ run: |
+ coverage run -m pytest -m 'not slow and not network and not clipboard' pandas
+ continue-on-error: true
+
+ - name: Publish test results
+ uses: actions/upload-artifact@master
+ with:
+ name: Test results
+ path: test-data.xml
+ if: failure()
+
+ - name: Print skipped tests
+ run: |
+ python ci/print_skipped.py
+
+ - name: Report Coverage
+ run: |
+ coverage report -m
+
+ - name: Upload coverage to Codecov
+ uses: codecov/codecov-action@v1
+ with:
+ flags: unittests
+ name: codecov-pandas
+ fail_ci_if_error: true
@@ -104,6 +104,7 @@ asv_bench/env/
 asv_bench/html/
 asv_bench/results/
 asv_bench/pandas/
+test-data.xml
 
 # Documentation generated files #
 #################################
 
@@ -35,7 +35,7 @@ repos:
  exclude: ^pandas/_libs/src/(klib|headers)/
  args: [--quiet, '--extensions=c,h', '--headers=h', --recursive, '--filter=-readability/casting,-runtime/int,-build/include_subdir']
 - repo: https://gitlab.com/pycqa/flake8
- rev: 3.9.0
+ rev: 3.9.1
  hooks:
  - id: flake8
  additional_dependencies:
@@ -75,7 +75,7 @@ repos:
  hooks:
  - id: yesqa
  additional_dependencies:
- - flake8==3.9.0
+ - flake8==3.9.1
  - flake8-comprehensions==3.1.0
  - flake8-bugbear==21.3.2
  - pandas-dev-flaker==0.2.0
 
@@ -3,7 +3,7 @@ BSD 3-Clause License
 Copyright (c) 2008-2011, AQR Capital Management, LLC, Lambda Foundry, Inc. and PyData Development Team
 All rights reserved.
 
-Copyright (c) 2011-2020, Open source contributors.
+Copyright (c) 2011-2021, Open source contributors.
 
 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions are met:
 
@@ -505,6 +505,34 @@ def time_frame_agg(self, dtype, method):
  self.df.groupby("key").agg(method)
 
 
+class CumminMax:
+ param_names = ["dtype", "method"]
+ params = [
+ ["float64", "int64", "Float64", "Int64"],
+ ["cummin", "cummax"],
+ ]
+
+ def setup(self, dtype, method):
+ N = 500_000
+ vals = np.random.randint(-10, 10, (N, 5))
+ null_vals = vals.astype(float, copy=True)
+ null_vals[::2, :] = np.nan
+ null_vals[::3, :] = np.nan
+ df = DataFrame(vals, columns=list("abcde"), dtype=dtype)
+ null_df = DataFrame(null_vals, columns=list("abcde"), dtype=dtype)
+ keys = np.random.randint(0, 100, size=N)
+ df["key"] = keys
+ null_df["key"] = keys
+ self.df = df
+ self.null_df = null_df
+
+ def time_frame_transform(self, dtype, method):
+ self.df.groupby("key").transform(method)
+
+ def time_frame_transform_many_nulls(self, dtype, method):
+ self.null_df.groupby("key").transform(method)
+
+
 class RankWithTies:
  # GH 21237
  param_names = ["dtype", "tie_method"]
 
@@ -17,19 +17,19 @@ def setup(self, cols, rows):
 
  def time_apply_render(self, cols, rows):
  self._style_apply()
- self.st.render()
+ self.st._render_html()
 
  def peakmem_apply_render(self, cols, rows):
  self._style_apply()
- self.st.render()
+ self.st._render_html()
 
  def time_classes_render(self, cols, rows):
  self._style_classes()
- self.st.render()
+ self.st._render_html()
 
  def peakmem_classes_render(self, cols, rows):
  self._style_classes()
- self.st.render()
+ self.st._render_html()
 
  def _style_apply(self):
  def _apply_func(s):
 
@@ -50,91 +50,126 @@ def peakmem_cat_frame_construction(self, dtype):
 
 
 class Methods:
- def setup(self):
- self.s = Series(tm.makeStringIndex(10 ** 5))
+ params = ["str", "string", "arrow_string"]
+ param_names = ["dtype"]
+
+ def setup(self, dtype):
+ from pandas.core.arrays.string_arrow import ArrowStringDtype # noqa: F401
 
- def time_center(self):
+ try:
+ self.s = Series(tm.makeStringIndex(10 ** 5), dtype=dtype)
+ except ImportError:
+ raise NotImplementedError
+
+ def time_center(self, dtype):
  self.s.str.center(100)
 
- def time_count(self):
+ def time_count(self, dtype):
  self.s.str.count("A")
 
- def time_endswith(self):
+ def time_endswith(self, dtype):
  self.s.str.endswith("A")
 
- def time_extract(self):
+ def time_extract(self, dtype):
  with warnings.catch_warnings(record=True):
  self.s.str.extract("(\\w*)A(\\w*)")
 
- def time_findall(self):
+ def time_findall(self, dtype):
  self.s.str.findall("[A-Z]+")
 
- def time_find(self):
+ def time_find(self, dtype):
  self.s.str.find("[A-Z]+")
 
- def time_rfind(self):
+ def time_rfind(self, dtype):
  self.s.str.rfind("[A-Z]+")
 
- def time_get(self):
+ def time_get(self, dtype):
  self.s.str.get(0)
 
- def time_len(self):
+ def time_len(self, dtype):
  self.s.str.len()
 
- def time_join(self):
+ def time_join(self, dtype):
  self.s.str.join(" ")
 
- def time_match(self):
+ def time_match(self, dtype):
  self.s.str.match("A")
 
- def time_normalize(self):
+ def time_normalize(self, dtype):
  self.s.str.normalize("NFC")
 
- def time_pad(self):
+ def time_pad(self, dtype):
  self.s.str.pad(100, side="both")
 
- def time_partition(self):
+ def time_partition(self, dtype):
  self.s.str.partition("A")
 
- def time_rpartition(self):
+ def time_rpartition(self, dtype):
  self.s.str.rpartition("A")
 
- def time_replace(self):
+ def time_replace(self, dtype):
  self.s.str.replace("A", "\x01\x01")
 
- def time_translate(self):
+ def time_translate(self, dtype):
  self.s.str.translate({"A": "\x01\x01"})
 
- def time_slice(self):
+ def time_slice(self, dtype):
  self.s.str.slice(5, 15, 2)
 
- def time_startswith(self):
+ def time_startswith(self, dtype):
  self.s.str.startswith("A")
 
- def time_strip(self):
+ def time_strip(self, dtype):
  self.s.str.strip("A")
 
- def time_rstrip(self):
+ def time_rstrip(self, dtype):
  self.s.str.rstrip("A")
 
- def time_lstrip(self):
+ def time_lstrip(self, dtype):
  self.s.str.lstrip("A")
 
- def time_title(self):
+ def time_title(self, dtype):
  self.s.str.title()
 
- def time_upper(self):
+ def time_upper(self, dtype):
  self.s.str.upper()
 
- def time_lower(self):
+ def time_lower(self, dtype):
  self.s.str.lower()
 
- def time_wrap(self):
+ def time_wrap(self, dtype):
  self.s.str.wrap(10)
 
- def time_zfill(self):
+ def time_zfill(self, dtype):
  self.s.str.zfill(10)
 
+ def time_isalnum(self, dtype):
+ self.s.str.isalnum()
+
+ def time_isalpha(self, dtype):
+ self.s.str.isalpha()
+
+ def time_isdecimal(self, dtype):
+ self.s.str.isdecimal()
+
+ def time_isdigit(self, dtype):
+ self.s.str.isdigit()
+
+ def time_islower(self, dtype):
+ self.s.str.islower()
+
+ def time_isnumeric(self, dtype):
+ self.s.str.isnumeric()
+
+ def time_isspace(self, dtype):
+ self.s.str.isspace()
+
+ def time_istitle(self, dtype):
+ self.s.str.istitle()
+
+ def time_isupper(self, dtype):
+ self.s.str.isupper()
+
 
 class Repeat:
 
@@ -178,13 +213,18 @@ def time_cat(self, other_cols, sep, na_rep, na_frac):
 
 class Contains:
 
- params = [True, False]
- param_names = ["regex"]
+ params = (["str", "string", "arrow_string"], [True, False])
+ param_names = ["dtype", "regex"]
+
+ def setup(self, dtype, regex):
+ from pandas.core.arrays.string_arrow import ArrowStringDtype # noqa: F401
 
- def setup(self, regex):
- self.s = Series(tm.makeStringIndex(10 ** 5))
+ try:
+ self.s = Series(tm.makeStringIndex(10 ** 5), dtype=dtype)
+ except ImportError:
+ raise NotImplementedError
 
- def time_contains(self, regex):
+ def time_contains(self, dtype, regex):
  self.s.str.contains("A", regex=regex)