Skip to content
6 changes: 3 additions & 3 deletions pandas/core/tools/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -775,21 +775,21 @@ def calc_with_mask(carg, mask):
# try intlike / strings that are ints
try:
return calc(arg.astype(np.int64))
except ValueError:
except (ValueError, OverflowError):
pass

# a float with actual np.nan
try:
carg = arg.astype(np.float64)
return calc_with_mask(carg, notna(carg))
except ValueError:
except (ValueError, OverflowError):
pass

# string with NaN-like
try:
mask = ~algorithms.isin(arg, list(tslib.nat_strings))
return calc_with_mask(arg, mask)
except ValueError:
except (ValueError, OverflowError):
pass

return None
Expand Down
29 changes: 29 additions & 0 deletions pandas/tests/indexes/datetimes/test_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,35 @@ def test_to_datetime_format_YYYYMMDD(self, cache):
result = to_datetime(s, format='%Y%m%d', cache=cache)
assert_series_equal(result, expected)

# GH 25512
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you make a new test (and _overflow) in the name

# NaN before strings with invalid date values, errors=coerce
s = Series(['19801222', np.nan, '20010012', '10019999'])
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you parameterize over these Series inputs?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it what you meant?
Thanks!

result = pd.to_datetime(s, format='%Y%m%d', errors='coerce',
cache=cache)
expected = Series([Timestamp('19801222'), np.nan, np.nan, np.nan])
tm.assert_series_equal(result, expected)

# NaN after strings with invalid date values, errors=coerce
s = Series(['19801222', '20010012', '10019999', np.nan])
result = pd.to_datetime(s, format='%Y%m%d', errors='coerce',
cache=cache)
expected = Series([Timestamp('19801222'), np.nan, np.nan, np.nan])
tm.assert_series_equal(result, expected)

# NaN before integers with invalid date values, errors=coerce
s = Series([20190813, np.nan, 20010012, 20019999])
result = pd.to_datetime(s, format='%Y%m%d', errors='coerce',
cache=cache)
expected = Series([Timestamp('20190813'), np.nan, np.nan, np.nan])
tm.assert_series_equal(result, expected)

# NaN after integers with invalid date values, errors=coerce
s = Series([20190813, 20010012, np.nan, 20019999])
result = pd.to_datetime(s, format='%Y%m%d', errors='coerce',
cache=cache)
expected = Series([Timestamp('20190813'), np.nan, np.nan, np.nan])
tm.assert_series_equal(result, expected)

# coercion
# GH 7930
s = Series([20121231, 20141231, 99991231])
Expand Down