Skip to content
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.25.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -359,6 +359,7 @@ Datetimelike
- Bug in :class:`Series` and :class:`DataFrame` repr where ``np.datetime64('NaT')`` and ``np.timedelta64('NaT')`` with ``dtype=object`` would be represented as ``NaN`` (:issue:`25445`)
- Bug in :func:`to_datetime` which does not replace the invalid argument with ``NaT`` when error is set to coerce (:issue:`26122`)
- Bug in adding :class:`DateOffset` with nonzero month to :class:`DatetimeIndex` would raise ``ValueError`` (:issue:`26258`)
- Bug in :func:`to_datetime` which raises unhandled ``OverflowError`` when called with mix of invalid dates and ``NaN`` values with ``format='%Y%m%d'`` and ``error='coerce'`` (:issue:`25512`)

Timedelta
^^^^^^^^^
Expand Down
6 changes: 3 additions & 3 deletions pandas/core/tools/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -775,21 +775,21 @@ def calc_with_mask(carg, mask):
# try intlike / strings that are ints
try:
return calc(arg.astype(np.int64))
except ValueError:
except (ValueError, OverflowError):
pass

# a float with actual np.nan
try:
carg = arg.astype(np.float64)
return calc_with_mask(carg, notna(carg))
except ValueError:
except (ValueError, OverflowError):
pass

# string with NaN-like
try:
mask = ~algorithms.isin(arg, list(tslib.nat_strings))
return calc_with_mask(arg, mask)
except ValueError:
except (ValueError, OverflowError):
pass

return None
Expand Down
19 changes: 19 additions & 0 deletions pandas/tests/indexes/datetimes/test_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,25 @@ def test_to_datetime_format_YYYYMMDD(self, cache):
result = pd.to_datetime(s, format='%Y%m%d', errors='coerce',
cache=cache)
expected = Series(['20121231', '20141231', 'NaT'], dtype='M8[ns]')
tm.assert_series_equal(result, expected)

@pytest.mark.parametrize("input_s, expected", [
# NaN before strings with invalid date values
[Series(['19801222', np.nan, '20010012', '10019999']),
Series([Timestamp('19801222'), np.nan, np.nan, np.nan])],
# NaN after strings with invalid date values
[Series(['19801222', '20010012', '10019999', np.nan]),
Series([Timestamp('19801222'), np.nan, np.nan, np.nan])],
# NaN before integers with invalid date values
[Series([20190813, np.nan, 20010012, 20019999]),
Series([Timestamp('20190813'), np.nan, np.nan, np.nan])],
# NaN after integers with invalid date values
[Series([20190813, 20010012, np.nan, 20019999]),
Series([Timestamp('20190813'), np.nan, np.nan, np.nan])]])
def test_to_datetime_format_YYYYMMDD_overflow(self, input_s, expected):
# GH 25512
# format='%Y%m%d', errors='coerce'
result = pd.to_datetime(input_s, format='%Y%m%d', errors='coerce')
assert_series_equal(result, expected)

@pytest.mark.parametrize('cache', [True, False])
Expand Down