-
- Notifications
You must be signed in to change notification settings - Fork 19.4k
ENH: Parse %z and %Z directive in format for to_datetime #19979
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 31 commits
4a43815 cb47c08 f299aec 259ec8f 77af4db 54c2491 0e2a0cd d31e141 7bdbdf4 3e3d5c6 c16ef8c 6f0b7f0 0525823 4c22808 24e1c0a 4f2f865 145e5da 64bc3fc 47a9d69 1b44554 0dcc59f 149781b d99ef5a 0e5e3c6 9a2ea19 924859e a1599a0 6c80c2e abccc3e 473a0f4 ab0a692 56fc683 85bd45e eb2a661 5500ca8 0e0d0fd 34f638c 757458d File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,7 +1,8 @@ | ||
| from datetime import datetime, timedelta, time | ||
| import numpy as np | ||
| from collections import MutableMapping | ||
| | ||
| import numpy as np | ||
| | ||
| from pandas._libs import tslib | ||
| from pandas._libs.tslibs.strptime import array_strptime | ||
| from pandas._libs.tslibs import parsing, conversion | ||
| | @@ -27,6 +28,7 @@ | |
| ABCDataFrame) | ||
| from pandas.core.dtypes.missing import notna | ||
| from pandas.core import algorithms | ||
| from pandas.compat import zip | ||
| | ||
| | ||
| def _guess_datetime_format_for_array(arr, **kwargs): | ||
| | @@ -103,6 +105,41 @@ def _convert_and_box_cache(arg, cache_array, box, errors, name=None): | |
| return result.values | ||
| | ||
| | ||
| def _return_parsed_timezone_results(result, timezones, box, tz): | ||
| """ | ||
| Return results from array_strptime if a %z or %Z directive was passed. | ||
| | ||
| Parameters | ||
| ---------- | ||
| result : ndarray | ||
| int64 date representations of the dates | ||
| timezones : ndarray | ||
| pytz timezone objects | ||
| box : boolean | ||
| True boxes result as an Index-like, False returns an ndarray | ||
| tz : object | ||
| None or pytz timezone object | ||
| Returns | ||
| ------- | ||
| tz_result : ndarray of parsed dates with timezone | ||
| Returns: | ||
| | ||
| - Index-like if box=True | ||
| - ndarray of Timestamps if box=False | ||
| | ||
| """ | ||
| if tz is not None: | ||
| raise ValueError("Cannot pass a tz argument when " | ||
| "parsing strings with timezone " | ||
| "information.") | ||
| tz_results = np.array([tslib.Timestamp(res).tz_localize(tz) for res, tz | ||
| in zip(result, timezones)]) | ||
| if box: | ||
| from pandas import Index | ||
| return Index(tz_results) | ||
| return tz_results | ||
| | ||
| | ||
| def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False, | ||
| utc=None, box=True, format=None, exact=True, | ||
| unit=None, infer_datetime_format=False, origin='unix', | ||
| | @@ -343,8 +380,11 @@ def _convert_listlike(arg, box, format, name=None, tz=tz): | |
| # fallback | ||
| if result is None: | ||
| try: | ||
| result = array_strptime(arg, format, exact=exact, | ||
| errors=errors) | ||
| result, timezones = array_strptime( | ||
| Contributor There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would much rather do the error handling in the _return_parsed_timezone_results. This block is just very complicated and hard to grok | ||
| arg, format, exact=exact, errors=errors) | ||
| if '%Z' in format or '%z' in format: | ||
| return _return_parsed_timezone_results( | ||
| result, timezones, box, tz) | ||
| except tslib.OutOfBoundsDatetime: | ||
| if errors == 'raise': | ||
| raise | ||
| | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| | @@ -179,6 +179,55 @@ def test_to_datetime_format_weeks(self, cache): | |
| for s, format, dt in data: | ||
| assert to_datetime(s, format=format, cache=cache) == dt | ||
| | ||
| @pytest.mark.parametrize("box,const,assert_equal", [ | ||
| [True, pd.Index, 'assert_index_equal'], | ||
| [False, np.array, 'assert_numpy_array_equal']]) | ||
| @pytest.mark.parametrize("fmt,dates,expected_dates", [ | ||
| ['%Y-%m-%d %H:%M:%S %Z', | ||
| ['2010-01-01 12:00:00 UTC'] * 2, | ||
| [pd.Timestamp('2010-01-01 12:00:00', tz='UTC')] * 2], | ||
| ['%Y-%m-%d %H:%M:%S %Z', | ||
| ['2010-01-01 12:00:00 UTC', | ||
| '2010-01-01 12:00:00 GMT', | ||
| '2010-01-01 12:00:00 US/Pacific'], | ||
| [pd.Timestamp('2010-01-01 12:00:00', tz='UTC'), | ||
| pd.Timestamp('2010-01-01 12:00:00', tz='GMT'), | ||
| pd.Timestamp('2010-01-01 12:00:00', tz='US/Pacific')]], | ||
| ['%Y-%m-%d %H:%M:%S %z', | ||
| Member There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you one of them, eg this one, without the space before the tz? | ||
| ['2010-01-01 12:00:00 +0100'] * 2, | ||
| [pd.Timestamp('2010-01-01 12:00:00', | ||
| tzinfo=pytz.FixedOffset(60))] * 2], | ||
| ['%Y-%m-%d %H:%M:%S %z', | ||
| ['2010-01-01 12:00:00 +0100', '2010-01-01 12:00:00 -0100'], | ||
| [pd.Timestamp('2010-01-01 12:00:00', | ||
| tzinfo=pytz.FixedOffset(60)), | ||
| pd.Timestamp('2010-01-01 12:00:00', | ||
| tzinfo=pytz.FixedOffset(-60))]], | ||
| ['%Y-%m-%d %H:%M:%S %z', | ||
| ['2010-01-01 12:00:00 Z', '2010-01-01 12:00:00 Z'], | ||
| Member There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should this also work with Member Author There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The regex I pulled from https://github.com/python/cpython/blob/master/Lib/_strptime.py has an option for 'Z' with But Member There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. OK (that's probably a newer addition to python), then it makes sense to follow upstream python to be consistent | ||
| [pd.Timestamp('2010-01-01 12:00:00', | ||
| tzinfo=pytz.FixedOffset(0)), | ||
| ||
| pd.Timestamp('2010-01-01 12:00:00', | ||
| tzinfo=pytz.FixedOffset(0))]]]) | ||
| def test_to_datetime_parse_tzname_or_tzoffset(self, box, const, | ||
| assert_equal, fmt, | ||
| dates, expected_dates): | ||
| # GH 13486 | ||
| result = pd.to_datetime(dates, format=fmt, box=box) | ||
| expected = const(expected_dates) | ||
| getattr(tm, assert_equal)(result, expected) | ||
| | ||
| with pytest.raises(ValueError): | ||
| pd.to_datetime(dates, format=fmt, box=box, utc=True) | ||
| | ||
| @pytest.mark.parametrize('offset', [ | ||
| '+0', '-1foo', 'UTCbar', ':10', '+01:000:01']) | ||
| ||
| def test_to_datetime_parse_timezone_malformed(self, offset): | ||
| fmt = '%Y-%m-%d %H:%M:%S %z' | ||
| date = '2010-01-01 12:00:00 ' + offset | ||
| with pytest.raises(ValueError): | ||
| pd.to_datetime([date], format=fmt) | ||
| | ||
| | ||
| class TestToDatetime(object): | ||
| def test_to_datetime_pydatetime(self): | ||
| | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
can you move this whole parse to a function and just all it here (and return the values as a tuple)