Skip to content

Commit ab49678

Browse files
authored
Merge branch 'main' into cow_index_ref_tracking
2 parents 538a3f8 + 880f63b commit ab49678

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

51 files changed

+546
-977
lines changed

.circleci/setup_env.sh

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,8 @@ if pip list | grep -q ^pandas; then
5555
fi
5656

5757
echo "Build extensions"
58-
python setup.py build_ext -q -j4
58+
# GH 47305: Parallel build can causes flaky ImportError from pandas/_libs/tslibs
59+
python setup.py build_ext -q -j1
5960

6061
echo "Install pandas"
6162
python -m pip install --no-build-isolation --no-use-pep517 -e .

.github/workflows/32-bit-linux.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ jobs:
4040
python -m pip install --no-deps -U pip wheel 'setuptools<60.0.0' && \
4141
python -m pip install versioneer[toml] && \
4242
python -m pip install cython numpy python-dateutil pytz pytest>=7.0.0 pytest-xdist>=2.2.0 pytest-asyncio>=0.17 hypothesis>=6.34.2 && \
43-
python setup.py build_ext -q -j$(nproc) && \
43+
python setup.py build_ext -q -j1 && \
4444
python -m pip install --no-build-isolation --no-use-pep517 -e . && \
4545
python -m pip list && \
4646
export PANDAS_CI=1 && \

.github/workflows/python-dev.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,9 +82,10 @@ jobs:
8282
python -m pip install python-dateutil pytz cython hypothesis>=6.34.2 pytest>=7.0.0 pytest-xdist>=2.2.0 pytest-cov pytest-asyncio>=0.17
8383
python -m pip list
8484
85+
# GH 47305: Parallel build can cause flaky ImportError from pandas/_libs/tslibs
8586
- name: Build Pandas
8687
run: |
87-
python setup.py build_ext -q -j4
88+
python setup.py build_ext -q -j1
8889
python -m pip install -e . --no-build-isolation --no-use-pep517 --no-index
8990
9091
- name: Build Version

MANIFEST.in

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,5 +58,3 @@ prune pandas/tests/io/parser/data
5858
# Selectively re-add *.cxx files that were excluded above
5959
graft pandas/_libs/src
6060
graft pandas/_libs/tslibs/src
61-
include pandas/_libs/pd_parser.h
62-
include pandas/_libs/pd_parser.c

doc/source/whatsnew/v2.1.0.rst

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,8 @@ Performance improvements
113113
- Performance improvement in :meth:`DataFrame.clip` and :meth:`Series.clip` (:issue:`51472`)
114114
- Performance improvement in :meth:`DataFrame.first_valid_index` and :meth:`DataFrame.last_valid_index` for extension array dtypes (:issue:`51549`)
115115
- Performance improvement in :meth:`DataFrame.where` when ``cond`` is backed by an extension dtype (:issue:`51574`)
116-
- Performance improvement in :meth:`read_orc` when reading a remote URI file path. (:issue:`51609`)
116+
- Performance improvement in :func:`read_orc` when reading a remote URI file path. (:issue:`51609`)
117+
- Performance improvement in :func:`read_parquet` and :meth:`DataFrame.to_parquet` when reading a remote file with ``engine="pyarrow"`` (:issue:`51609`)
117118
- Performance improvement in :meth:`MultiIndex.sortlevel` when ``ascending`` is a list (:issue:`51612`)
118119
- Performance improvement in :meth:`~arrays.ArrowExtensionArray.isna` when array has zero nulls or is all nulls (:issue:`51630`)
119120
- Performance improvement when parsing strings to ``boolean[pyarrow]`` dtype (:issue:`51730`)

pandas/_libs/__init__.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,6 @@
1010
]
1111

1212

13-
# Below imports needs to happen first to ensure pandas top level
14-
# module gets monkeypatched with the pandas_datetime_CAPI
15-
# see pandas_datetime_exec in pd_datetime.c
16-
import pandas._libs.pandas_parser # noqa # isort: skip # type: ignore[reportUnusedImport]
17-
import pandas._libs.pandas_datetime # noqa # isort: skip # type: ignore[reportUnusedImport]
1813
from pandas._libs.interval import Interval
1914
from pandas._libs.tslibs import (
2015
NaT,

pandas/_libs/index.pyx

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,12 +20,7 @@ from pandas._libs.tslibs.nattype cimport c_NaT as NaT
2020
from pandas._libs.tslibs.np_datetime cimport (
2121
NPY_DATETIMEUNIT,
2222
get_unit_from_dtype,
23-
import_pandas_datetime,
2423
)
25-
26-
import_pandas_datetime()
27-
28-
2924
from pandas._libs.tslibs.period cimport is_period_object
3025
from pandas._libs.tslibs.timedeltas cimport _Timedelta
3126
from pandas._libs.tslibs.timestamps cimport _Timestamp

pandas/_libs/lib.pyx

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -88,11 +88,9 @@ cdef extern from "numpy/arrayobject.h":
8888
cdef extern from "numpy/ndarrayobject.h":
8989
bint PyArray_CheckScalar(obj) nogil
9090

91-
cdef extern from "pd_parser.h":
92-
int floatify(object, float64_t *result, int *maybe_int) except -1
93-
void PandasParser_IMPORT()
9491

95-
PandasParser_IMPORT
92+
cdef extern from "src/parse_helper.h":
93+
int floatify(object, float64_t *result, int *maybe_int) except -1
9694

9795
from pandas._libs cimport util
9896
from pandas._libs.util cimport (

pandas/_libs/missing.pyx

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,11 +34,8 @@ from pandas._libs.tslibs.np_datetime cimport (
3434
get_datetime64_unit,
3535
get_datetime64_value,
3636
get_timedelta64_value,
37-
import_pandas_datetime,
3837
)
3938

40-
import_pandas_datetime()
41-
4239
from pandas._libs.ops_dispatch import maybe_dispatch_ufunc_to_dunder_op
4340

4441
cdef:

pandas/_libs/parsers.pyx

Lines changed: 20 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -229,9 +229,9 @@ cdef extern from "parser/tokenizer.h":
229229
int64_t skip_first_N_rows
230230
int64_t skipfooter
231231
# pick one, depending on whether the converter requires GIL
232-
double (*double_converter)(const char *, char **,
233-
char, char, char,
234-
int, int *, int *) nogil
232+
float64_t (*double_converter)(const char *, char **,
233+
char, char, char,
234+
int, int *, int *) nogil
235235

236236
# error handling
237237
char *warn_msg
@@ -249,16 +249,6 @@ cdef extern from "parser/tokenizer.h":
249249
int seen_uint
250250
int seen_null
251251

252-
void COLITER_NEXT(coliter_t, const char *) nogil
253-
254-
cdef extern from "pd_parser.h":
255-
void *new_rd_source(object obj) except NULL
256-
257-
int del_rd_source(void *src)
258-
259-
void* buffer_rd_bytes(void *source, size_t nbytes,
260-
size_t *bytes_read, int *status, const char *encoding_errors)
261-
262252
void uint_state_init(uint_state *self)
263253
int uint64_conflict(uint_state *self)
264254

@@ -289,49 +279,26 @@ cdef extern from "pd_parser.h":
289279
uint64_t str_to_uint64(uint_state *state, char *p_item, int64_t int_max,
290280
uint64_t uint_max, int *error, char tsep) nogil
291281

292-
double xstrtod(const char *p, char **q, char decimal,
293-
char sci, char tsep, int skip_trailing,
294-
int *error, int *maybe_int) nogil
295-
double precise_xstrtod(const char *p, char **q, char decimal,
296-
char sci, char tsep, int skip_trailing,
297-
int *error, int *maybe_int) nogil
298-
double round_trip(const char *p, char **q, char decimal,
282+
float64_t xstrtod(const char *p, char **q, char decimal,
299283
char sci, char tsep, int skip_trailing,
300284
int *error, int *maybe_int) nogil
285+
float64_t precise_xstrtod(const char *p, char **q, char decimal,
286+
char sci, char tsep, int skip_trailing,
287+
int *error, int *maybe_int) nogil
288+
float64_t round_trip(const char *p, char **q, char decimal,
289+
char sci, char tsep, int skip_trailing,
290+
int *error, int *maybe_int) nogil
301291

302292
int to_boolean(const char *item, uint8_t *val) nogil
303293

304-
void PandasParser_IMPORT()
305-
306-
PandasParser_IMPORT
307-
308-
# When not invoked directly but rather assigned as a function,
309-
# cdef extern'ed declarations seem to leave behind an undefined symbol
310-
cdef double xstrtod_wrapper(const char *p, char **q, char decimal,
311-
char sci, char tsep, int skip_trailing,
312-
int *error, int *maybe_int) nogil:
313-
return xstrtod(p, q, decimal, sci, tsep, skip_trailing, error, maybe_int)
314-
315-
316-
cdef double precise_xstrtod_wrapper(const char *p, char **q, char decimal,
317-
char sci, char tsep, int skip_trailing,
318-
int *error, int *maybe_int) nogil:
319-
return precise_xstrtod(p, q, decimal, sci, tsep, skip_trailing, error, maybe_int)
320-
321-
322-
cdef double round_trip_wrapper(const char *p, char **q, char decimal,
323-
char sci, char tsep, int skip_trailing,
324-
int *error, int *maybe_int) nogil:
325-
return round_trip(p, q, decimal, sci, tsep, skip_trailing, error, maybe_int)
326294

295+
cdef extern from "parser/io.h":
296+
void *new_rd_source(object obj) except NULL
327297

328-
cdef void* buffer_rd_bytes_wrapper(void *source, size_t nbytes,
329-
size_t *bytes_read, int *status,
330-
const char *encoding_errors) noexcept:
331-
return buffer_rd_bytes(source, nbytes, bytes_read, status, encoding_errors)
298+
int del_rd_source(void *src)
332299

333-
cdef int del_rd_source_wrapper(void *src) noexcept:
334-
return del_rd_source(src)
300+
void* buffer_rd_bytes(void *source, size_t nbytes,
301+
size_t *bytes_read, int *status, const char *encoding_errors)
335302

336303

337304
cdef class TextReader:
@@ -518,11 +485,11 @@ cdef class TextReader:
518485

519486
if float_precision == "round_trip":
520487
# see gh-15140
521-
self.parser.double_converter = round_trip_wrapper
488+
self.parser.double_converter = round_trip
522489
elif float_precision == "legacy":
523-
self.parser.double_converter = xstrtod_wrapper
490+
self.parser.double_converter = xstrtod
524491
elif float_precision == "high" or float_precision is None:
525-
self.parser.double_converter = precise_xstrtod_wrapper
492+
self.parser.double_converter = precise_xstrtod
526493
else:
527494
raise ValueError(f"Unrecognized float_precision option: "
528495
f"{float_precision}")
@@ -640,8 +607,8 @@ cdef class TextReader:
640607

641608
ptr = new_rd_source(source)
642609
self.parser.source = ptr
643-
self.parser.cb_io = buffer_rd_bytes_wrapper
644-
self.parser.cb_cleanup = del_rd_source_wrapper
610+
self.parser.cb_io = &buffer_rd_bytes
611+
self.parser.cb_cleanup = &del_rd_source
645612

646613
cdef _get_header(self, list prelim_header):
647614
# header is now a list of lists, so field_count should use header[0]

0 commit comments

Comments
 (0)