@@ -20,6 +20,7 @@ except:
2020 except :
2121 from _dummy_thread import allocate_lock as _thread_allocate_lock
2222
23+ import pytz
2324
2425from cython cimport Py_ssize_t
2526from cpython cimport PyFloat_Check
@@ -40,6 +41,27 @@ from util cimport is_string_object
4041from nattype cimport checknull_with_nat, NPY_NAT
4142from nattype import nat_strings
4243
44+ cdef dict _parse_code_table = {' y' : 0 ,
45+ ' Y' : 1 ,
46+ ' m' : 2 ,
47+ ' B' : 3 ,
48+ ' b' : 4 ,
49+ ' d' : 5 ,
50+ ' H' : 6 ,
51+ ' I' : 7 ,
52+ ' M' : 8 ,
53+ ' S' : 9 ,
54+ ' f' : 10 ,
55+ ' A' : 11 ,
56+ ' a' : 12 ,
57+ ' w' : 13 ,
58+ ' j' : 14 ,
59+ ' U' : 15 ,
60+ ' W' : 16 ,
61+ ' Z' : 17 ,
62+ ' p' : 18 , # an additional key, only with I
63+ ' z' : 19 }
64+
4365
4466def array_strptime (ndarray[object] values , object fmt ,
4567 bint exact = True , errors = ' raise' ):
@@ -58,15 +80,15 @@ def array_strptime(ndarray[object] values, object fmt,
5880 Py_ssize_t i, n = len (values)
5981 pandas_datetimestruct dts
6082 ndarray[int64_t] iresult
61- int year, month, day, minute, hour, second, weekday, julian, tz
62- int week_of_year, week_of_year_start
83+ ndarray[object ] result_timezone
84+ int year, month, day, minute, hour, second, weekday, julian
85+ int week_of_year, week_of_year_start, parse_code, ordinal
6386 int64_t us, ns
64- object val, group_key, ampm, found
87+ object val, group_key, ampm, found, timezone
6588 dict found_key
6689 bint is_raise = errors== ' raise'
6790 bint is_ignore = errors== ' ignore'
6891 bint is_coerce = errors== ' coerce'
69- int ordinal
7092
7193 assert is_raise or is_ignore or is_coerce
7294
@@ -79,6 +101,8 @@ def array_strptime(ndarray[object] values, object fmt,
79101 in fmt):
80102 raise ValueError (" Cannot use '%W ' or '%U ' without "
81103 " day and year" )
104+ elif ' %Z ' in fmt and ' %z ' in fmt:
105+ raise ValueError (" Cannot parse both %Z and %z " )
82106
83107 global _TimeRE_cache, _regex_cache
84108 with _cache_lock:
@@ -108,32 +132,10 @@ def array_strptime(ndarray[object] values, object fmt,
108132
109133 result = np.empty(n, dtype = ' M8[ns]' )
110134 iresult = result.view(' i8' )
135+ result_timezone = np.empty(n, dtype = ' object' )
111136
112137 dts.us = dts.ps = dts.as = 0
113138
114- cdef dict _parse_code_table = {
115- ' y' : 0 ,
116- ' Y' : 1 ,
117- ' m' : 2 ,
118- ' B' : 3 ,
119- ' b' : 4 ,
120- ' d' : 5 ,
121- ' H' : 6 ,
122- ' I' : 7 ,
123- ' M' : 8 ,
124- ' S' : 9 ,
125- ' f' : 10 ,
126- ' A' : 11 ,
127- ' a' : 12 ,
128- ' w' : 13 ,
129- ' j' : 14 ,
130- ' U' : 15 ,
131- ' W' : 16 ,
132- ' Z' : 17 ,
133- ' p' : 18 # just an additional key, works only with I
134- }
135- cdef int parse_code
136-
137139 for i in range (n):
138140 val = values[i]
139141 if is_string_object(val):
@@ -176,7 +178,7 @@ def array_strptime(ndarray[object] values, object fmt,
176178 year = 1900
177179 month = day = 1
178180 hour = minute = second = ns = us = 0
179- tz = - 1
181+ timezone = None
180182 # Default to -1 to signify that values not known; not critical to have,
181183 # though
182184 week_of_year = - 1
@@ -266,21 +268,10 @@ def array_strptime(ndarray[object] values, object fmt,
266268 # W starts week on Monday.
267269 week_of_year_start = 0
268270 elif parse_code == 17 :
269- # Since -1 is default value only need to worry about setting tz
270- # if it can be something other than -1.
271- found_zone = found_dict[' Z' ].lower()
272- for value, tz_values in enumerate (locale_time.timezone):
273- if found_zone in tz_values:
274- # Deal w/ bad locale setup where timezone names are the
275- # same and yet time.daylight is true; too ambiguous to
276- # be able to tell what timezone has daylight savings
277- if (time.tzname[0 ] == time.tzname[1 ] and
278- time.daylight and found_zone not in (
279- " utc" , " gmt" )):
280- break
281- else :
282- tz = value
283- break
271+ timezone = pytz.timezone(found_dict[' Z' ])
272+ elif parse_code == 19 :
273+ timezone = parse_timezone_directive(found_dict[' z' ])
274+
284275 # If we know the wk of the year and what day of that wk, we can figure
285276 # out the Julian day of the year.
286277 if julian == - 1 and week_of_year != - 1 and weekday != - 1 :
@@ -330,7 +321,9 @@ def array_strptime(ndarray[object] values, object fmt,
330321 continue
331322 raise
332323
333- return result
324+ result_timezone[i] = timezone
325+
326+ return result, result_timezone
334327
335328
336329""" _getlang, LocaleTime, TimeRE, _calc_julian_from_U_or_W are vendored
@@ -538,14 +531,13 @@ class TimeRE(dict):
538531 # XXX: Does 'Y' need to worry about having less or more than
539532 # 4 digits?
540533 ' Y' : r " ( ?P<Y> \d\d\d\d ) " ,
534+ ' z' : r " ( ?P<z> [+- ]\d\d :? [0-5 ]\d ( :? [0-5 ]\d ( \. \d {1,6} ) ? ) ? | Z) " ,
541535 ' A' : self .__seqToRE(self .locale_time.f_weekday, ' A' ),
542536 ' a' : self .__seqToRE(self .locale_time.a_weekday, ' a' ),
543537 ' B' : self .__seqToRE(self .locale_time.f_month[1 :], ' B' ),
544538 ' b' : self .__seqToRE(self .locale_time.a_month[1 :], ' b' ),
545539 ' p' : self .__seqToRE(self .locale_time.am_pm, ' p' ),
546- ' Z' : self .__seqToRE([tz for tz_names in self .locale_time.timezone
547- for tz in tz_names],
548- ' Z' ),
540+ ' Z' : self .__seqToRE(pytz.all_timezones, ' Z' ),
549541 ' %' : ' %' })
550542 base.__setitem__ (' W' , base.__getitem__ (' U' ).replace(' U' , ' W' ))
551543 base.__setitem__ (' c' , self .pattern(self .locale_time.LC_date_time))
@@ -632,3 +624,50 @@ cdef _calc_julian_from_U_or_W(int year, int week_of_year,
632624 else :
633625 days_to_week = week_0_length + (7 * (week_of_year - 1 ))
634626 return 1 + days_to_week + day_of_week
627+
628+ cdef parse_timezone_directive(object z):
629+ """
630+ Parse the '%z ' directive and return a pytz.FixedOffset
631+
632+ Parameters
633+ ----------
634+ z : string of the UTC offset
635+
636+ Returns
637+ -------
638+ pytz.FixedOffset
639+
640+ Notes
641+ -----
642+ This is essentially similar to the cpython implementation
643+ https://github.com/python/cpython/blob/master/Lib/_strptime.py#L457-L479
644+ """
645+
646+ cdef:
647+ int gmtoff_fraction, hours, minutes, seconds, pad_number, microseconds
648+ int total_minutes
649+ object gmtoff_remainder, gmtoff_remainder_padding
650+
651+ if z == ' Z' :
652+ return pytz.FixedOffset(0 )
653+ if z[3 ] == ' :' :
654+ z = z[:3 ] + z[4 :]
655+ if len (z) > 5 :
656+ if z[5 ] != ' :' :
657+ msg = " Inconsistent use of : in {0}"
658+ raise ValueError (msg.format(z))
659+ z = z[:5 ] + z[6 :]
660+ hours = int (z[1 :3 ])
661+ minutes = int (z[3 :5 ])
662+ seconds = int (z[5 :7 ] or 0 )
663+
664+ # Pad to always return microseconds.
665+ gmtoff_remainder = z[8 :]
666+ pad_number = 6 - len (gmtoff_remainder)
667+ gmtoff_remainder_padding = " 0" * pad_number
668+ microseconds = int (gmtoff_remainder + gmtoff_remainder_padding)
669+
670+ total_minutes = ((hours * 60 ) + minutes + (seconds / 60 ) +
671+ (microseconds / 60000000 ))
672+ total_minutes = - total_minutes if z.startswith(" -" ) else total_minutes
673+ return pytz.FixedOffset(total_minutes)
0 commit comments