11# -*- coding: utf-8 -*-
22# cython: profile=False
33import collections
4- import re
54
65import sys
76cdef bint PY3 = (sys.version_info[0 ] >= 3 )
@@ -236,6 +235,14 @@ cpdef inline int64_t cast_from_unit(object ts, object unit) except? -1:
236235 return < int64_t> (base * m) + < int64_t> (frac * m)
237236
238237
238+ cdef inline _decode_if_necessary(object ts):
239+ # decode ts if necessary
240+ if not PyUnicode_Check(ts) and not PY3:
241+ ts = str (ts).decode(' utf-8' )
242+
243+ return ts
244+
245+
239246cdef inline parse_timedelta_string(object ts):
240247 """
241248 Parse a regular format timedelta string. Return an int64_t (in ns)
@@ -258,9 +265,7 @@ cdef inline parse_timedelta_string(object ts):
258265 if len (ts) == 0 or ts in nat_strings:
259266 return NPY_NAT
260267
261- # decode ts if necessary
262- if not PyUnicode_Check(ts) and not PY3:
263- ts = str (ts).decode(' utf-8' )
268+ ts = _decode_if_necessary(ts)
264269
265270 for c in ts:
266271
@@ -507,26 +512,14 @@ def _binary_op_method_timedeltalike(op, name):
507512# ----------------------------------------------------------------------
508513# Timedelta Construction
509514
510- iso_pater = re.compile(r """ P
511- ( ?P<days> -? [0-9 ]* ) DT
512- ( ?P<hours> [0-9 ]{1,2} ) H
513- ( ?P<minutes> [0-9 ]{1,2} ) M
514- ( ?P<seconds> [0-9 ]{0,2} )
515- ( \.
516- ( ?P<milliseconds> [0-9 ]{1,3} )
517- ( ?P<microseconds> [0-9 ]{0,3} )
518- ( ?P<nanoseconds> [0-9 ]{0,3} )
519- ) ? S""" , re.VERBOSE)
520-
521-
522- cdef int64_t parse_iso_format_string(object iso_fmt) except ? - 1 :
515+ cdef inline int64_t parse_iso_format_string(object ts) except ? - 1 :
523516 """
524517 Extracts and cleanses the appropriate values from a match object with
525518 groups for each component of an ISO 8601 duration
526519
527520 Parameters
528521 ----------
529- iso_fmt :
522+ ts :
530523 ISO 8601 Duration formatted string
531524
532525 Returns
@@ -537,25 +530,93 @@ cdef int64_t parse_iso_format_string(object iso_fmt) except? -1:
537530 Raises
538531 ------
539532 ValueError
540- If ``iso_fmt `` cannot be parsed
533+ If ``ts `` cannot be parsed
541534 """
542535
543- cdef int64_t ns = 0
536+ cdef:
537+ unicode c
538+ int64_t result = 0 , r
539+ int p= 0
540+ object dec_unit = ' ms' , err_msg
541+ bint have_dot= 0 , have_value= 0 , neg= 0
542+ list number= [], unit= []
544543
545- match = re.match(iso_pater, iso_fmt)
546- if match:
547- match_dict = match.groupdict(default = ' 0' )
548- for comp in [' milliseconds' , ' microseconds' , ' nanoseconds' ]:
549- match_dict[comp] = ' {:0<3}' .format(match_dict[comp])
544+ ts = _decode_if_necessary(ts)
550545
551- for k, v in match_dict.items():
552- ns += timedelta_from_spec(v, ' 0' , k)
546+ err_msg = " Invalid ISO 8601 Duration format - {}" .format(ts)
553547
554- else :
555- raise ValueError (" Invalid ISO 8601 Duration format - "
556- " {}" .format(iso_fmt))
548+ for c in ts:
549+ # number (ascii codes)
550+ if ord (c) >= 48 and ord (c) <= 57 :
551+
552+ have_value = 1
553+ if have_dot:
554+ if p == 3 and dec_unit != ' ns' :
555+ unit.append(dec_unit)
556+ if dec_unit == ' ms' :
557+ dec_unit = ' us'
558+ elif dec_unit == ' us' :
559+ dec_unit = ' ns'
560+ p = 0
561+ p += 1
562+
563+ if not len (unit):
564+ number.append(c)
565+ else :
566+ # if in days, pop trailing T
567+ if unit[- 1 ] == ' T' :
568+ unit.pop()
569+ elif ' H' in unit or ' M' in unit:
570+ if len (number) > 2 :
571+ raise ValueError (err_msg)
572+ r = timedelta_from_spec(number, ' 0' , unit)
573+ result += timedelta_as_neg(r, neg)
557574
558- return ns
575+ neg = 0
576+ unit, number = [], [c]
577+ else :
578+ if c == ' P' :
579+ pass # ignore leading character
580+ elif c == ' -' :
581+ if neg or have_value:
582+ raise ValueError (err_msg)
583+ else :
584+ neg = 1
585+ elif c in [' D' , ' T' , ' H' , ' M' ]:
586+ unit.append(c)
587+ elif c == ' .' :
588+ # append any seconds
589+ if len (number):
590+ r = timedelta_from_spec(number, ' 0' , ' S' )
591+ result += timedelta_as_neg(r, neg)
592+ unit, number = [], []
593+ have_dot = 1
594+ elif c == ' S' :
595+ if have_dot: # ms, us, or ns
596+ if not len (number) or p > 3 :
597+ raise ValueError (err_msg)
598+ # pad to 3 digits as required
599+ pad = 3 - p
600+ while pad > 0 :
601+ number.append(' 0' )
602+ pad -= 1
603+
604+ r = timedelta_from_spec(number, ' 0' , dec_unit)
605+ result += timedelta_as_neg(r, neg)
606+ else : # seconds
607+ if len (number) <= 2 :
608+ r = timedelta_from_spec(number, ' 0' , ' S' )
609+ result += timedelta_as_neg(r, neg)
610+ else :
611+ raise ValueError (err_msg)
612+ else :
613+ raise ValueError (err_msg)
614+
615+ if not have_value:
616+ # Received string only - never parsed any values
617+ raise ValueError (err_msg)
618+
619+ return result
559620
560621
561622cdef _to_py_int_float(v):
0 commit comments