Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 8 additions & 8 deletions pandas/_libs/tslib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -508,8 +508,8 @@ cpdef array_to_datetime(
continue
elif is_raise:
raise ValueError(
f"time data \"{val}\" at position {i} doesn't "
f"match format \"{format}\""
f"time data \"{val}\" doesn't "
f"match format \"{format}\", at position {i}"
)
return values, tz_out
# these must be ns unit by-definition
Expand Down Expand Up @@ -557,8 +557,8 @@ cpdef array_to_datetime(
continue
elif is_raise:
raise ValueError(
f"time data \"{val}\" at position {i} doesn't "
f"match format \"{format}\""
f"time data \"{val}\" doesn't "
f"match format \"{format}\", at position {i}"
)
return values, tz_out

Expand All @@ -575,8 +575,8 @@ cpdef array_to_datetime(
iresult[i] = NPY_NAT
continue
raise TypeError(
f"invalid string coercion to datetime for \"{val}\" "
f"at position {i}"
f"invalid string coercion to datetime "
f"for \"{val}\", at position {i}"
)

if tz is not None:
Expand Down Expand Up @@ -619,7 +619,7 @@ cpdef array_to_datetime(
raise TypeError(f"{type(val)} is not convertible to datetime")

except OutOfBoundsDatetime as ex:
ex.args = (str(ex) + f" present at position {i}", )
ex.args = (f"{ex}, at position {i}",)
if is_coerce:
iresult[i] = NPY_NAT
continue
Expand Down Expand Up @@ -779,7 +779,7 @@ cdef _array_to_datetime_object(
pydatetime_to_dt64(oresult[i], &dts)
check_dts_bounds(&dts)
except (ValueError, OverflowError) as ex:
ex.args = (f"{ex} present at position {i}", )
ex.args = (f"{ex}, at position {i}", )
if is_coerce:
oresult[i] = <object>NaT
continue
Expand Down
9 changes: 4 additions & 5 deletions pandas/_libs/tslibs/strptime.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -236,11 +236,11 @@ def array_strptime(
if exact:
found = format_regex.match(val)
if not found:
raise ValueError(f"time data \"{val}\" at position {i} doesn't "
raise ValueError(f"time data \"{val}\" doesn't "
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Rather than trying to set as position {i} in each place where an error can occur, it's more reliable to just preprend at position {i} when an error is caught (see below)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I haven't done the same in tslib.pyx because when format is provided, that code is going to be called here anyway in #50242
So for now I've just made sure the error messages match for the two paths

f"match format \"{fmt}\"")
if len(val) != found.end():
raise ValueError(
f"unconverted data remains at position {i}: "
f"unconverted data remains: "
f'"{val[found.end():]}"'
)

Expand All @@ -249,7 +249,7 @@ def array_strptime(
found = format_regex.search(val)
if not found:
raise ValueError(
f"time data \"{val}\" at position {i} doesn't match "
f"time data \"{val}\" doesn't match "
f"format \"{fmt}\""
)

Expand Down Expand Up @@ -402,8 +402,7 @@ def array_strptime(
result_timezone[i] = tz

except (ValueError, OutOfBoundsDatetime) as ex:
if isinstance(ex, OutOfBoundsDatetime):
ex.args = (f"{str(ex)} present at position {i}",)
ex.args = (f"{str(ex)}, at position {i}",)
if is_coerce:
iresult[i] = NPY_NAT
continue
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/io/parser/test_parse_dates.py
Original file line number Diff line number Diff line change
Expand Up @@ -1721,7 +1721,7 @@ def test_parse_multiple_delimited_dates_with_swap_warnings():
with pytest.raises(
ValueError,
match=(
r'^time data "31/05/2000" at position 1 doesn\'t match format "%m/%d/%Y"$'
r'^time data "31/05/2000" doesn\'t match format "%m/%d/%Y", at position 1$'
),
):
pd.to_datetime(["01/01/2000", "31/05/2000", "31/05/2001", "01/02/2000"])
Expand Down
113 changes: 75 additions & 38 deletions pandas/tests/tools/test_to_datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -481,8 +481,8 @@ def test_to_datetime_parse_timezone_malformed(self, offset):

msg = "|".join(
[
r'^time data ".*" at position 0 doesn\'t match format ".*"$',
r'^unconverted data remains at position 0: ".*"$',
r'^time data ".*" doesn\'t match format ".*", at position 0$',
r'^unconverted data remains: ".*", at position 0$',
]
)
with pytest.raises(ValueError, match=msg):
Expand Down Expand Up @@ -859,7 +859,7 @@ def test_to_datetime_dt64s_and_str(self, arg, format):
"dt", [np.datetime64("1000-01-01"), np.datetime64("5000-01-02")]
)
def test_to_datetime_dt64s_out_of_bounds(self, cache, dt):
msg = "Out of bounds .* present at position 0"
msg = "^Out of bounds nanosecond timestamp: .*, at position 0$"
with pytest.raises(OutOfBoundsDatetime, match=msg):
to_datetime(dt, errors="raise")

Expand Down Expand Up @@ -1098,7 +1098,7 @@ def test_datetime_bool_arrays_mixed(self, cache):
to_datetime([False, datetime.today()], cache=cache)
with pytest.raises(
ValueError,
match=r'^time data "True" at position 1 doesn\'t match format "%Y%m%d"$',
match=r'^time data "True" doesn\'t match format "%Y%m%d", at position 1$',
):
to_datetime(["20130101", True], cache=cache)
tm.assert_index_equal(
Expand Down Expand Up @@ -1139,10 +1139,10 @@ def test_datetime_invalid_scalar(self, value, format, warning):

msg = "|".join(
[
r'^time data "a" at position 0 doesn\'t match format "%H:%M:%S"$',
r'^Given date string "a" not likely a datetime present at position 0$',
r'^unconverted data remains at position 0: "9"$',
r"^second must be in 0..59: 00:01:99 present at position 0$",
r'^time data "a" doesn\'t match format "%H:%M:%S", at position 0$',
r'^Given date string "a" not likely a datetime, at position 0$',
r'^unconverted data remains: "9", at position 0$',
r"^second must be in 0..59: 00:01:99, at position 0$",
]
)
with pytest.raises(ValueError, match=msg):
Expand All @@ -1164,11 +1164,11 @@ def test_datetime_outofbounds_scalar(self, value, format, warning):
assert res is NaT

if format is not None:
msg = r'^time data ".*" at position 0 doesn\'t match format ".*"$'
msg = r'^time data ".*" doesn\'t match format ".*", at position 0$'
with pytest.raises(ValueError, match=msg):
to_datetime(value, errors="raise", format=format)
else:
msg = "Out of bounds .* present at position 0"
msg = "^Out of bounds .*, at position 0$"
with pytest.raises(
OutOfBoundsDatetime, match=msg
), tm.assert_produces_warning(warning, match="Could not infer format"):
Expand All @@ -1190,10 +1190,10 @@ def test_datetime_invalid_index(self, values, format, warning):

msg = "|".join(
[
r'^Given date string "a" not likely a datetime present at position 0$',
r'^time data "a" at position 0 doesn\'t match format "%H:%M:%S"$',
r'^unconverted data remains at position 0: "9"$',
r"^second must be in 0..59: 00:01:99 present at position 0$",
r'^Given date string "a" not likely a datetime, at position 0$',
r'^time data "a" doesn\'t match format "%H:%M:%S", at position 0$',
r'^unconverted data remains: "9", at position 0$',
r"^second must be in 0..59: 00:01:99, at position 0$",
]
)
with pytest.raises(ValueError, match=msg):
Expand Down Expand Up @@ -1373,7 +1373,7 @@ def test_to_datetime_malformed_raise(self):
ts_strings = ["200622-12-31", "111111-24-11"]
with pytest.raises(
ValueError,
match=r"^hour must be in 0\.\.23: 111111-24-11 present at position 1$",
match=r"^hour must be in 0\.\.23: 111111-24-11, at position 1$",
):
with tm.assert_produces_warning(
UserWarning, match="Could not infer format"
Expand Down Expand Up @@ -1814,8 +1814,8 @@ def test_dataframe_coerce(self, cache):
df2 = DataFrame({"year": [2015, 2016], "month": [2, 20], "day": [4, 5]})

msg = (
r'^cannot assemble the datetimes: time data ".+" at position 1 doesn\'t '
r'match format "%Y%m%d"$'
r'^cannot assemble the datetimes: time data ".+" doesn\'t '
r'match format "%Y%m%d", at position 1$'
)
with pytest.raises(ValueError, match=msg):
to_datetime(df2, cache=cache)
Expand Down Expand Up @@ -1892,8 +1892,8 @@ def test_dataframe_float(self, cache):
# float
df = DataFrame({"year": [2000, 2001], "month": [1.5, 1], "day": [1, 1]})
msg = (
r"^cannot assemble the datetimes: unconverted data remains at position "
r'0: "1"$'
r"^cannot assemble the datetimes: unconverted data remains: "
r'"1", at position 0$'
)
with pytest.raises(ValueError, match=msg):
to_datetime(df, cache=cache)
Expand All @@ -1915,7 +1915,7 @@ def test_to_datetime_barely_out_of_bounds(self):
# in an in-bounds datetime
arr = np.array(["2262-04-11 23:47:16.854775808"], dtype=object)

msg = "Out of bounds .* present at position 0"
msg = "^Out of bounds nanosecond timestamp: .*, at position 0"
with pytest.raises(OutOfBoundsDatetime, match=msg):
with tm.assert_produces_warning(
UserWarning, match="Could not infer format"
Expand Down Expand Up @@ -1954,8 +1954,8 @@ def test_to_datetime_iso8601_fails(self, input, format, exact):
with pytest.raises(
ValueError,
match=(
rf"time data \"{input}\" at position 0 doesn't match format "
rf"\"{format}\""
rf"time data \"{input}\" doesn't match format "
rf"\"{format}\", at position 0"
),
):
to_datetime(input, format=format, exact=exact)
Expand All @@ -1976,8 +1976,8 @@ def test_to_datetime_iso8601_exact_fails(self, input, format):
with pytest.raises(
ValueError,
match=(
rf"time data \"{input}\" at position 0 doesn't match format "
rf"\"{format}\""
rf"time data \"{input}\" doesn't match format "
rf"\"{format}\", at position 0"
),
):
to_datetime(input, format=format)
Expand Down Expand Up @@ -2015,8 +2015,8 @@ def test_to_datetime_iso8601_separator(self, input, format):
with pytest.raises(
ValueError,
match=(
rf"time data \"{input}\" at position 0 doesn\'t match format "
rf"\"{format}\""
rf"time data \"{input}\" doesn\'t match format "
rf"\"{format}\", at position 0"
),
):
to_datetime(input, format=format)
Expand Down Expand Up @@ -2084,7 +2084,7 @@ def test_to_datetime_on_datetime64_series(self, cache):
def test_to_datetime_with_space_in_series(self, cache):
# GH 6428
ser = Series(["10/18/2006", "10/18/2008", " "])
msg = r'^time data " " at position 2 doesn\'t match format "%m/%d/%Y"$'
msg = r'^time data " " doesn\'t match format "%m/%d/%Y", at position 2$'
with pytest.raises(ValueError, match=msg):
to_datetime(ser, errors="raise", cache=cache)
result_coerce = to_datetime(ser, errors="coerce", cache=cache)
Expand Down Expand Up @@ -2355,8 +2355,8 @@ def test_dayfirst_warnings_invalid_input(self):
with pytest.raises(
ValueError,
match=(
r'^time data "03/30/2011" at position 1 doesn\'t match format '
r'"%d/%m/%Y"$'
r'^time data "03/30/2011" doesn\'t match format '
r'"%d/%m/%Y", at position 1$'
),
):
to_datetime(arr, dayfirst=True)
Expand Down Expand Up @@ -2426,8 +2426,8 @@ def test_to_datetime_inconsistent_format(self, cache):
data = ["01/01/2011 00:00:00", "01-02-2011 00:00:00", "2011-01-03T00:00:00"]
ser = Series(np.array(data))
msg = (
r'^time data "01-02-2011 00:00:00" at position 1 doesn\'t match format '
r'"%m/%d/%Y %H:%M:%S"$'
r'^time data "01-02-2011 00:00:00" doesn\'t match format '
r'"%m/%d/%Y %H:%M:%S", at position 1$'
)
with pytest.raises(ValueError, match=msg):
to_datetime(ser, cache=cache)
Expand Down Expand Up @@ -2550,11 +2550,49 @@ def test_day_not_in_month_raise(self, cache):
):
to_datetime("2015-02-29", errors="raise", cache=cache)

@pytest.mark.parametrize("arg", ["2015-02-29", "2015-02-32", "2015-04-31"])
def test_day_not_in_month_raise_value(self, cache, arg):
msg = f'time data "{arg}" at position 0 doesn\'t match format "%Y-%m-%d"'
@pytest.mark.parametrize(
"arg, format, msg",
[
(
"2015-02-29",
"%Y-%m-%d",
'^time data "2015-02-29" doesn\'t match format "%Y-%m-%d", '
"at position 0$",
),
(
"2015-29-02",
"%Y-%d-%m",
"^day is out of range for month, at position 0$",
),
(
"2015-02-32",
"%Y-%m-%d",
'^time data "2015-02-32" doesn\'t match format "%Y-%m-%d", '
"at position 0$",
),
(
"2015-32-02",
"%Y-%d-%m",
'^time data "2015-32-02" doesn\'t match format "%Y-%d-%m", '
"at position 0$",
),
(
"2015-04-31",
"%Y-%m-%d",
'^time data "2015-04-31" doesn\'t match format "%Y-%m-%d", '
"at position 0$",
),
(
"2015-31-04",
"%Y-%d-%m",
"^day is out of range for month, at position 0$",
),
Comment on lines +2579 to +2589
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

note how the ISO and non-ISO messages don't quite match yet - this PR just ensures that they both show the position of the error

#50242 will ensure that the messages match

],
)
def test_day_not_in_month_raise_value(self, cache, arg, format, msg):
# https://github.com/pandas-dev/pandas/issues/50462
with pytest.raises(ValueError, match=msg):
to_datetime(arg, errors="raise", format="%Y-%m-%d", cache=cache)
to_datetime(arg, errors="raise", format=format, cache=cache)

@pytest.mark.parametrize(
"expected, format, warning",
Expand Down Expand Up @@ -2934,7 +2972,7 @@ def test_invalid_origins_tzinfo(self):
def test_incorrect_value_exception(self):
# GH47495
with pytest.raises(
ValueError, match="Unknown string format: yesterday present at position 1"
ValueError, match="Unknown string format: yesterday, at position 1"
):
with tm.assert_produces_warning(
UserWarning, match="Could not infer format"
Expand All @@ -2952,8 +2990,7 @@ def test_incorrect_value_exception(self):
def test_to_datetime_out_of_bounds_with_format_arg(self, format, warning):
# see gh-23830
msg = (
r"^Out of bounds nanosecond timestamp: 2417-10-10 00:00:00 "
r"present at position 0$"
r"^Out of bounds nanosecond timestamp: 2417-10-10 00:00:00, at position 0$"
)
with pytest.raises(OutOfBoundsDatetime, match=msg):
with tm.assert_produces_warning(warning, match="Could not infer format"):
Expand Down