-
- Notifications
You must be signed in to change notification settings - Fork 19.4k
ENH: EADtype._find_compatible_dtype #53106
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
a0f8c31 0770a07 9be179b 1271e26 48a2f6d c62dcaa c85a3eb fb0b03a b809fd7 deab083 b565439 c3fbbcb 995f4b4 6f481f9 0790932 2b9a092 a85a6d4 92227ef File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| | @@ -8,10 +8,14 @@ | |
| ) | ||
| from decimal import Decimal | ||
| import re | ||
| from typing import TYPE_CHECKING | ||
| from typing import ( | ||
| TYPE_CHECKING, | ||
| Any, | ||
| ) | ||
| | ||
| import numpy as np | ||
| | ||
| from pandas._libs import missing as libmissing | ||
| from pandas._libs.tslibs import ( | ||
| Timedelta, | ||
| Timestamp, | ||
| | @@ -23,6 +27,7 @@ | |
| StorageExtensionDtype, | ||
| register_extension_dtype, | ||
| ) | ||
| from pandas.core.dtypes.cast import maybe_promote | ||
| from pandas.core.dtypes.dtypes import CategoricalDtypeType | ||
| | ||
| if not pa_version_under7p0: | ||
| | @@ -321,3 +326,27 @@ def __from_arrow__(self, array: pa.Array | pa.ChunkedArray): | |
| array_class = self.construct_array_type() | ||
| arr = array.cast(self.pyarrow_dtype, safe=True) | ||
| return array_class(arr) | ||
| | ||
| def _maybe_promote(self, item: Any) -> tuple[DtypeObj, Any]: | ||
| if isinstance(item, pa.Scalar): | ||
| if not item.is_valid: | ||
| ||
| # TODO: ask joris for help making these checks more robust | ||
| if item.type == self.pyarrow_dtype: | ||
| return self, item.as_py() | ||
| if item.type.to_pandas_dtype() == np.int64 and self.kind == "i": | ||
| ||
| # FIXME: kludge | ||
| return self, item.as_py() | ||
| | ||
| item = item.as_py() | ||
| | ||
| elif item is None or item is libmissing.NA: | ||
| # TODO: np.nan? use is_valid_na_for_dtype | ||
| ||
| return self, item | ||
| | ||
| dtype, item = maybe_promote(self.numpy_dtype, item) | ||
| | ||
| if dtype == self.numpy_dtype: | ||
| return self, item | ||
| | ||
| # TODO: implement from_numpy_dtype analogous to MaskedDtype.from_numpy_dtype | ||
| return np.dtype(object), item | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| | @@ -2091,7 +2091,7 @@ def _setitem_with_indexer_missing(self, indexer, value): | |
| return self._setitem_with_indexer(new_indexer, value, "loc") | ||
| | ||
| # this preserves dtype of the value and of the object | ||
| if not is_scalar(value): | ||
| if is_list_like(value): | ||
| Member There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Note for ArrowDtype with Member Author There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yah, getting rid of this is_list_like check causes us to incorrectly raise on numpy non-object cases when using a list value (for which we don't have any tests). Can fix that in this PR or separately, as it is a bit more invasive. | ||
| new_dtype = None | ||
| | ||
| elif is_valid_na_for_dtype(value, self.obj.dtype): | ||
| | @@ -2107,8 +2107,7 @@ def _setitem_with_indexer_missing(self, indexer, value): | |
| # We should not cast, if we have object dtype because we can | ||
| # set timedeltas into object series | ||
| curr_dtype = self.obj.dtype | ||
| curr_dtype = getattr(curr_dtype, "numpy_dtype", curr_dtype) | ||
| new_dtype = maybe_promote(curr_dtype, value)[0] | ||
| new_dtype, value = maybe_promote(curr_dtype, value) | ||
| else: | ||
| new_dtype = None | ||
| | ||
| | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| | @@ -2855,6 +2855,29 @@ def test_describe_timedelta_data(pa_type): | |
| tm.assert_series_equal(result, expected) | ||
| | ||
| | ||
| @pytest.mark.parametrize( | ||
| "value, target_value, dtype", | ||
| [ | ||
| (pa.scalar(4, type="int32"), 4, "int32[pyarrow]"), | ||
| (pa.scalar(4, type="int64"), 4, "int32[pyarrow]"), | ||
| # (pa.scalar(4.5, type="float64"), 4, "int32[pyarrow]"), | ||
| Member There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What happens here? Also what happens with a int64 scalar and int32 dtype? Member Author There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. id want to follow the same logic we do for numpy dtypes, but was punting here in expectation of doing it in a follow-up (likely involving joris expressing an opinion) | ||
| (4, 4, "int32[pyarrow]"), | ||
| (pd.NA, None, "int32[pyarrow]"), | ||
| (None, None, "int32[pyarrow]"), | ||
| (pa.scalar(None, type="int32"), None, "int32[pyarrow]"), | ||
| (pa.scalar(None, type="int64"), None, "int32[pyarrow]"), | ||
| ], | ||
| ) | ||
| def test_series_setitem_with_enlargement(value, target_value, dtype): | ||
| # GH#52235 | ||
| # similar to series/inedexing/test_setitem.py::test_setitem_keep_precision | ||
| # and test_setitem_enlarge_with_na, but for arrow dtypes | ||
| ser = pd.Series([1, 2, 3], dtype=dtype) | ||
| ser[3] = value | ||
| expected = pd.Series([1, 2, 3, target_value], dtype=dtype) | ||
| tm.assert_series_equal(ser, expected) | ||
| | ||
| | ||
| @pytest.mark.parametrize("pa_type", tm.DATETIME_PYARROW_DTYPES) | ||
| def test_describe_datetime_data(pa_type): | ||
| # GH53001 | ||
| | ||
Uh oh!
There was an error while loading. Please reload this page.