DOC/ENH: timedelta conversions & docs

TST: add tests/catch for non-absolute DateOffsets in timedelta operations
pandas-dev · jreback · Aug 13, 2013 · Aug 10, 2013 · Aug 10, 2013 · Aug 12, 2013
commit b7e80a53bba25038d749ba2a487e916023cd78d1
diff --git a/doc/source/release.rst b/doc/source/release.rst
@@ -53,8 +53,11 @@ pandas 0.13
  - Add ``rename`` and ``set_names`` methods to ``Index`` as well as
  ``set_names``, ``set_levels``, ``set_labels`` to ``MultiIndex``.
  (:issue:`4039`)
- - A Series of dtype ``Timedelta64[ns]`` can now be divided/multiplied
+ - A Series of dtype ``timedelta64[ns]`` can now be divided/multiplied
  by an integer series (:issue`4521`)
+ - A Series of dtype ``timedelta64[ns]`` can now be divided by another
+ ``timedelta64[ns]`` object to yield a ``float64`` dtyped Series. This
+ is frequency conversion.
 
 **API Changes**
 

diff --git a/doc/source/timeseries.rst b/doc/source/timeseries.rst
@@ -170,7 +170,7 @@ Take care, ``to_datetime`` may not act as you expect on mixed data:
 
 .. ipython:: python
 
- pd.to_datetime([1, '1'])
+ to_datetime([1, '1'])
 
 .. _timeseries.daterange:
 
@@ -297,7 +297,7 @@ the year or year and month as strings:
 
  ts['2011-6']
 
-This type of slicing will work on a DataFrame with a ``DateTimeIndex`` as well. Since the 
+This type of slicing will work on a DataFrame with a ``DateTimeIndex`` as well. Since the
 partial string selection is a form of label slicing, the endpoints **will be** included. This
 would include matching times on an included date. Here's an example:
 
@@ -1112,7 +1112,8 @@ Time Deltas
 -----------
 
 Timedeltas are differences in times, expressed in difference units, e.g. days,hours,minutes,seconds.
-They can be both positive and negative.
+They can be both positive and negative. :ref:`DateOffsets<timeseries.offsets>` that are absolute in nature
+(``Day, Hour, Minute, Second, Milli, Micro, Nano``) can be used as ``timedeltas``.
 
 .. ipython:: python
 
@@ -1128,41 +1129,16 @@ They can be both positive and negative.
  s - s.max()
  s - datetime(2011,1,1,3,5)
  s + timedelta(minutes=5)
+ s + Minute(5)
+ s + Minute(5) + Milli(5)
 
 Getting scalar results from a ``timedelta64[ns]`` series
 
-.. ipython:: python
- :suppress:
-
- from distutils.version import LooseVersion
-
 .. ipython:: python
 
  y = s - s[0]
  y
 
-.. code-block:: python
-
- if LooseVersion(np.__version__) <= '1.6.2':
- y.apply(lambda x: x.item().total_seconds())
- y.apply(lambda x: x.item().days)
- else:
- y.apply(lambda x: x / np.timedelta64(1, 's'))
- y.apply(lambda x: x / np.timedelta64(1, 'D'))
-
-.. note::
-
- As you can see from the conditional statement above, these operations are
- different in numpy 1.6.2 and in numpy >= 1.7. The ``timedelta64[ns]`` scalar
- type in 1.6.2 is much like a ``datetime.timedelta``, while in 1.7 it is a
- nanosecond based integer. A future version of pandas will make this
- transparent.
-
-.. note::
-
- In numpy >= 1.7 dividing a ``timedelta64`` array by another ``timedelta64``
- array will yield an array with dtype ``np.float64``.
-
 Series of timedeltas with ``NaT`` values are supported
 
 .. ipython:: python
@@ -1218,3 +1194,55 @@ issues). ``idxmin, idxmax`` are supported as well.
 
  df.min().idxmax()
  df.min(axis=1).idxmin()
+
+.. _timeseries.timedeltas_convert:
+
+Time Deltas & Conversions
+-------------------------
+
+.. versionadded:: 0.13
+
+Timedeltas can be converted to other 'frequencies' by dividing by another timedelta.
+These operations yield ``float64`` dtyped Series.
+
+.. ipython:: python
+
+ td = Series(date_range('20130101',periods=4))-Series(date_range('20121201',periods=4))
+ td[2] += np.timedelta64(timedelta(minutes=5,seconds=3))
+ td[3] = np.nan
+ td
+
+ # to days
+ td / np.timedelta64(1,'D')
+
+ # to seconds
+ td / np.timedelta64(1,'s')
+
+Dividing or multiplying a ``timedelta64[ns]`` Series by an integer or integer Series
+yields another ``timedelta64[ns]`` dtypes Series.
+
+.. ipython:: python
+
+ td * -1
+ td * Series([1,2,3,4])
+
+Numpy < 1.7 Compatibility
+~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Numpy < 1.7 has a broken ``timedelta64`` type that does not correctly work
+for arithmetic. Pandas bypasses this, but for frequency conversion as above,
+you need to create the divisor yourself. The ``np.timetimedelta64`` type only
+has 1 argument, the number of **micro** seconds.
+
+The following are equivalent statements in the two versions of numpy.
+
+.. code-block:: python
+
+ from distutils.version import LooseVersion
+ if LooseVersion(np.__version__) <= '1.6.2':
+ y / np.timedelta(86400*int(1e6))
+ y / np.timedelta(int(1e6))
+ else:
+ y / np.timedelta64(1,'D')
+ y / np.timedelta64(1,'s')
+
diff --git a/doc/source/v0.13.0.txt b/doc/source/v0.13.0.txt
@@ -100,6 +100,40 @@ Enhancements
  - Added a more informative error message when plot arguments contain
  overlapping color and style arguments (:issue:`4402`)
 
+ - ``timedelta64[ns]`` operations
+
+ - A Series of dtype ``timedelta64[ns]`` can now be divided by another
+ ``timedelta64[ns]`` object to yield a ``float64`` dtyped Series. This
+ is frequency conversion. See :ref:`here<timeseries.timedeltas_convert>` for the docs.
+
+ .. ipython:: python
+
+ from datetime import timedelta
+ td = Series(date_range('20130101',periods=4))-Series(date_range('20121201',periods=4))
+ td[2] += np.timedelta64(timedelta(minutes=5,seconds=3))
+ td[3] = np.nan
+ td
+
+ # to days
+ td / np.timedelta64(1,'D')
+
+ # to seconds
+ td / np.timedelta64(1,'s')
+
+ - Dividing or multiplying a ``timedelta64[ns]`` Series by an integer or integer Series
+
+ .. ipython:: python
+
+ td * -1
+ td * Series([1,2,3,4])
+
+ - Absolute ``DateOffset`` objects can act equivalenty to ``timedeltas``
+
+ .. ipython:: python
+
+ from pandas import offsets
+ td + offsets.Minute(5) + offsets.Milli(5)
+
 Bug Fixes
 ~~~~~~~~~
 

diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -85,13 +85,14 @@ def na_op(x, y):
  def wrapper(self, other, name=name):
  from pandas.core.frame import DataFrame
  dtype = None
+ fill_value = tslib.iNaT
  wrap_results = lambda x: x
 
  lvalues, rvalues = self, other
 
  is_timedelta_lhs = com.is_timedelta64_dtype(self)
  is_datetime_lhs = com.is_datetime64_dtype(self)
- is_integer_lhs = lvalues.dtype == np.int64
+ is_integer_lhs = lvalues.dtype.kind in ['i','u']
 
  if is_datetime_lhs or is_timedelta_lhs:
 
@@ -116,14 +117,19 @@ def convert_to_array(values):
  # py3 compat where dtype is 'm' but is an integer
  if values.dtype.kind == 'm':
  values = values.astype('timedelta64[ns]')
- elif name not in ['__div__','__mul__']:
- raise TypeError("incompatible type for a datetime/timedelta operation")
+ elif name not in ['__truediv__','__div__','__mul__']:
+ raise TypeError("incompatible type for a datetime/timedelta operation [{0}]".format(name))
  elif isinstance(values[0],DateOffset):
  # handle DateOffsets
- values = pa.array([ v.delta for v in values ])
- values = com._possibly_cast_to_timedelta(values, coerce=coerce)
+ os = pa.array([ getattr(v,'delta',None) for v in values ])
+ mask = isnull(os)
+ if mask.any():
+ raise TypeError("cannot use a non-absolute DateOffset in "
+ "datetime/timedelta operations [{0}]".format(','.join([ com.pprint_thing(v) for v in values[mask] ])))
+ values = com._possibly_cast_to_timedelta(os, coerce=coerce)
  else:
- values = pa.array(values)
+ raise TypeError("incompatible type [{0}] for a datetime/timedelta operation".format(pa.array(values).dtype))
+
  return values
 
  # convert lhs and rhs
@@ -132,32 +138,51 @@ def convert_to_array(values):
 
  is_datetime_rhs = com.is_datetime64_dtype(rvalues)
  is_timedelta_rhs = com.is_timedelta64_dtype(rvalues) or (not is_datetime_rhs and _np_version_under1p7)
- is_integer_rhs = rvalues.dtype == np.int64
+ is_integer_rhs = rvalues.dtype.kind in ['i','u']
  mask = None
 
  # timedelta and integer mul/div
  if (is_timedelta_lhs and is_integer_rhs) or (is_integer_lhs and is_timedelta_rhs):
 
- if name not in ['__div__','__mul__']:
+ if name not in ['__truediv__','__div__','__mul__']:
  raise TypeError("can only operate on a timedelta and an integer for "
  "division, but the operator [%s] was passed" % name)
  dtype = 'timedelta64[ns]'
  mask = isnull(lvalues) | isnull(rvalues)
  lvalues = lvalues.astype(np.int64)
  rvalues = rvalues.astype(np.int64)
 
- # 2 datetimes or 2 timedeltas
- elif (is_timedelta_lhs and is_timedelta_rhs) or (is_datetime_lhs and
- is_datetime_rhs):
- if is_datetime_lhs and name != '__sub__':
+ # 2 datetimes
+ elif is_datetime_lhs and is_datetime_rhs:
+ if name != '__sub__':
  raise TypeError("can only operate on a datetimes for subtraction, "
  "but the operator [%s] was passed" % name)
- elif is_timedelta_lhs and name not in ['__add__','__sub__']:
- raise TypeError("can only operate on a timedeltas for "
- "addition and subtraction, but the operator [%s] was passed" % name)
 
  dtype = 'timedelta64[ns]'
  mask = isnull(lvalues) | isnull(rvalues)
+ lvalues = lvalues.view('i8')
+ rvalues = rvalues.view('i8')
+
+ # 2 timedeltas
+ elif is_timedelta_lhs and is_timedelta_rhs:
+ mask = isnull(lvalues) | isnull(rvalues)
+
+ # time delta division -> unit less
+ if name in ['__div__','__truediv__']:
+ dtype = 'float64'
+ fill_value = np.nan
+ lvalues = lvalues.astype(np.int64).astype(np.float64)
+ rvalues = rvalues.astype(np.int64).astype(np.float64)
+
+ # another timedelta
+ elif name in ['__add__','__sub__']:
+ dtype = 'timedelta64[ns]'
+ lvalues = lvalues.astype(np.int64)
+ rvalues = rvalues.astype(np.int64)
+
+ else:
+ raise TypeError("can only operate on a timedeltas for "
+ "addition, subtraction, and division, but the operator [%s] was passed" % name)
 
  # datetime and timedelta
  elif is_timedelta_rhs and is_datetime_lhs:
@@ -166,13 +191,17 @@ def convert_to_array(values):
  raise TypeError("can only operate on a datetime with a rhs of a timedelta for "
  "addition and subtraction, but the operator [%s] was passed" % name)
  dtype = 'M8[ns]'
+ lvalues = lvalues.view('i8')
+ rvalues = rvalues.view('i8')
 
  elif is_timedelta_lhs and is_datetime_rhs:
 
  if name not in ['__add__']:
  raise TypeError("can only operate on a timedelta and a datetime for "
  "addition, but the operator [%s] was passed" % name)
  dtype = 'M8[ns]'
+ lvalues = lvalues.view('i8')
+ rvalues = rvalues.view('i8')
 
  else:
  raise TypeError('cannot operate on a series with out a rhs '
@@ -183,14 +212,11 @@ def convert_to_array(values):
  if mask is not None:
  if mask.any():
  def f(x):
- x = pa.array(x,dtype='timedelta64[ns]')
- np.putmask(x,mask,tslib.iNaT)
+ x = pa.array(x,dtype=dtype)
+ np.putmask(x,mask,fill_value)
  return x
  wrap_results = f
 
- lvalues = lvalues.view('i8')
- rvalues = rvalues.view('i8')
-
  if isinstance(rvalues, Series):
 
  if hasattr(lvalues,'values'):