pandas-dev · rosnfeld · Mar 3, 2014 · Mar 3, 2014 · Feb 24, 2014 · Mar 4, 2014
diff --git a/doc/source/release.rst b/doc/source/release.rst
@@ -107,6 +107,10 @@ API Changes
  or numbering columns as needed (:issue:`2385`)
 - Slicing and advanced/boolean indexing operations on ``Index`` classes will no
  longer change type of the resulting index (:issue:`6440`).
+- ``set_index`` no longer converts MultiIndexes to an Index of tuples (:issue:`6459`).
+- Slicing with negative start, stop & step values handles corner cases better (:issue:`6531`):
+ - ``df.iloc[:-len(df)]`` is now empty
+ - ``df.iloc[len(df)::-1]`` now enumerates all elements in reverse
 
 Experimental Features
 ~~~~~~~~~~~~~~~~~~~~~
@@ -139,6 +143,7 @@ Improvements to existing features
 Bug Fixes
 ~~~~~~~~~
 
+- Bug in Series ValueError when index doesn't match data (:issue:`6532`)
 - Bug in ``pd.DataFrame.sort_index`` where mergesort wasn't stable when ``ascending=False`` (:issue:`6399`)
 - Bug in ``pd.tseries.frequencies.to_offset`` when argument has leading zeroes (:issue:`6391`)
 - Bug in version string gen. for dev versions with shallow clones / install from tarball (:issue:`6127`)
@@ -180,7 +185,7 @@ Bug Fixes
 - Bug in :meth:`DataFrame.replace` where nested dicts were erroneously
  depending on the order of dictionary keys and values (:issue:`5338`).
 - Perf issue in concatting with empty objects (:issue:`3259`)
-- Clarify sorting of ``sym_diff`` on ``Index``es with ``NaN``s (:isssue:`6444`)
+- Clarify sorting of ``sym_diff`` on ``Index``es with ``NaN``s (:issue:`6444`)
 - Regression in ``MultiIndex.from_product`` with a ``DatetimeIndex`` as input (:issue:`6439`)
 - Bug in ``str.extract`` when passed a non-default index (:issue:`6348`)
 - Bug in ``str.split`` when passed ``pat=None`` and ``n=1`` (:issue:`6466`)
@@ -194,6 +199,16 @@ Bug Fixes
 - Bug in ``read_html`` tests where redirected invalid URLs would make one test
  fail (:issue:`6445`).
 - Bug in multi-axis indexing using ``.loc`` on non-unique indices (:issue:`6504`)
+- Bug that caused _ref_locs corruption when slice indexing across columns axis of a DataFrame (:issue:`6525`)
+- Regression from 0.13 in the treatmenet of numpy ``datetime64`` non-ns dtypes in Series creation (:issue:`6529`)
+- ``.names`` attribute of MultiIndexes passed to ``set_index`` are now preserved (:issue:`6459`).
+- Bug in setitem with a duplicate index and an alignable rhs (:issue:`6541`)
+- Bug in setitem with loc on mixed integer Indexes (:issue:`6546`)
+- Bug in ``pd.read_stata`` which would use the wrong data types and missing values (:issue:`6327`)
+- Bug in ``DataFrame.to_stata`` that lead to data loss in certain cases, and could exported using the
+ wrong data types and missing values (:issue:`6335`)
+- Inconsistent types in Timestamp addition/subtraction (:issue:`6543`)
+
 
 pandas 0.13.1
 -------------

diff --git a/doc/source/v0.14.0.txt b/doc/source/v0.14.0.txt
@@ -92,6 +92,49 @@ These are out-of-bounds selections
  .. ipython:: python
 
  i[[0,1,2]].astype(np.int_)
+- ``set_index`` no longer converts MultiIndexes to an Index of tuples. For example,
+ the old behavior returned an Index in this case (:issue:`6459`):
+
+ .. ipython:: python
+ :suppress:
+
+ from itertools import product
+ tuples = list(product(('a', 'b'), ('c', 'd')))
+ mi = MultiIndex.from_tuples(tuples)
+ df_multi = DataFrame(np.random.randn(4, 2), index=mi)
+ tuple_ind = pd.Index(tuples)
+
+ .. ipython:: python
+
+ df_multi.index
+
+ @suppress
+ df_multi.index = tuple_ind
+
+ # Old behavior, casted MultiIndex to an Index
+ df_multi.set_index(df_multi.index)
+
+ @suppress
+ df_multi.index = mi
+
+ # New behavior
+ df_multi.set_index(df_multi.index)
+
+ This also applies when passing multiple indices to ``set_index``:
+
+ .. ipython:: python
+
+ @suppress
+ df_multi.index = tuple_ind
+
+ # Old output, 2-level MultiIndex of tuples
+ df_multi.set_index([df_multi.index, df_multi.index])
+
+ @suppress
+ df_multi.index = mi
+
+ # New output, 4-level MultiIndex
+ df_multi.set_index([df_multi.index, df_multi.index])
 
 
 MultiIndexing Using Slicers
@@ -248,6 +291,9 @@ Enhancements
  using ``DataFrame.to_csv`` (:issue:`5414`, :issue:`4528`)
 - Added a ``to_julian_date`` function to ``TimeStamp`` and ``DatetimeIndex``
  to convert to the Julian Date used primarily in astronomy. (:issue:`4041`)
+- ``DataFrame.to_stata`` will now check data for compatibility with Stata data types
+ and will upcast when needed. When it isn't possibly to losslessly upcast, a warning
+ is raised (:issue:`6327`)
 
 Performance
 ~~~~~~~~~~~

diff --git a/pandas/core/common.py b/pandas/core/common.py
@@ -124,7 +124,7 @@ def isnull(obj):
 
  See also
  --------
- pandas.notnull: boolean inverse of pandas.isnull 
+ pandas.notnull: boolean inverse of pandas.isnull
  """
  return _isnull(obj)
 
@@ -272,7 +272,7 @@ def notnull(obj):
  isnulled : array-like of bool or bool
  Array or bool indicating whether an object is *not* null or if an array
  is given which of the element is *not* null.
- 
+
  See also
  --------
  pandas.isnull : boolean inverse of pandas.notnull
@@ -1727,10 +1727,7 @@ def _possibly_cast_to_datetime(value, dtype, coerce=False):
  dtype = value.dtype
 
  if dtype.kind == 'M' and dtype != _NS_DTYPE:
- try:
- value = tslib.array_to_datetime(value)
- except:
- raise
+ value = value.astype(_NS_DTYPE)
 
  elif dtype.kind == 'm' and dtype != _TD_DTYPE:
  from pandas.tseries.timedeltas import \

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -1867,11 +1867,6 @@ def eval(self, expr, **kwargs):
  kwargs['resolvers'] = kwargs.get('resolvers', ()) + resolvers
  return _eval(expr, **kwargs)
 
- def _slice(self, slobj, axis=0, raise_on_error=False, typ=None):
- axis = self._get_block_manager_axis(axis)
- new_data = self._data.get_slice(
- slobj, axis=axis, raise_on_error=raise_on_error)
- return self._constructor(new_data)
 
  def _box_item_values(self, key, values):
  items = self.columns[self.columns.get_loc(key)]
@@ -2240,7 +2235,15 @@ def set_index(self, keys, drop=True, append=False, inplace=False,
 
  to_remove = []
  for col in keys:
- if isinstance(col, Series):
+ if isinstance(col, MultiIndex):
+ # append all but the last column so we don't have to modify
+ # the end of this loop
+ for n in range(col.nlevels - 1):
+ arrays.append(col.get_level_values(n))
+
+ level = col.get_level_values(col.nlevels - 1)
+ names.extend(col.names)
+ elif isinstance(col, (Series, Index)):
  level = col.values
  names.append(col.name)
  elif isinstance(col, (list, np.ndarray)):

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -1079,6 +1079,16 @@ def _clear_item_cache(self, i=None):
  else:
  self._item_cache.clear()
 
+ def _slice(self, slobj, axis=0, typ=None):
+ """
+ Construct a slice of this container.
+
+ typ parameter is maintained for compatibility with Series slicing.
+
+ """
+ axis = self._get_block_manager_axis(axis)
+ return self._constructor(self._data.get_slice(slobj, axis=axis))
+
  def _set_item(self, key, value):
  self._data.set(key, value)
  self._clear_item_cache()

diff --git a/pandas/core/index.py b/pandas/core/index.py
@@ -555,6 +555,29 @@ def _convert_list_indexer(self, key, typ=None):
  """ convert a list indexer. these should be locations """
  return key
 
+ def _convert_list_indexer_for_mixed(self, keyarr, typ=None):
+ """ passed a key that is tuplesafe that is integer based
+ and we have a mixed index (e.g. number/labels). figure out
+ the indexer. return None if we can't help
+ """
+ if com.is_integer_dtype(keyarr) and not self.is_floating():
+ if self.inferred_type != 'integer':
+ keyarr = np.where(keyarr < 0,
+ len(self) + keyarr, keyarr)
+
+ if self.inferred_type == 'mixed-integer':
+ indexer = self.get_indexer(keyarr)
+ if (indexer >= 0).all():
+ return indexer
+
+ from pandas.core.indexing import _maybe_convert_indices
+ return _maybe_convert_indices(indexer, len(self))
+
+ elif not self.inferred_type == 'integer':
+ return keyarr
+
+ return None
+
  def _convert_indexer_error(self, key, msg=None):
  if msg is None:
  msg = 'label'
@@ -987,8 +1010,13 @@ def intersection(self, other):
  except TypeError:
  pass
 
- indexer = self.get_indexer(other.values)
- indexer = indexer.take((indexer != -1).nonzero()[0])
+ try:
+ indexer = self.get_indexer(other.values)
+ indexer = indexer.take((indexer != -1).nonzero()[0])
+ except:
+ # duplicates
+ indexer = self.get_indexer_non_unique(other.values)[0].unique()
+
  return self.take(indexer)
 
  def diff(self, other):

diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
@@ -91,32 +91,8 @@ def _get_label(self, label, axis=0):
  def _get_loc(self, key, axis=0):
  return self.obj._ixs(key, axis=axis)
 
- def _slice(self, obj, axis=0, raise_on_error=False, typ=None):
-
- # make out-of-bounds into bounds of the object
- if typ == 'iloc':
- ax = self.obj._get_axis(axis)
- l = len(ax)
- start = obj.start
- stop = obj.stop
- step = obj.step
- if start is not None:
- # degenerate to return nothing
- if start >= l:
- return self._getitem_axis(tuple(),axis=axis)
-
- # equiv to a null slice
- elif start <= -l:
- start = None
- if stop is not None:
- if stop > l:
- stop = None
- elif stop <= -l:
- stop = None
- obj = slice(start,stop,step)
-
- return self.obj._slice(obj, axis=axis, raise_on_error=raise_on_error,
- typ=typ)
+ def _slice(self, obj, axis=0, typ=None):
+ return self.obj._slice(obj, axis=axis, typ=typ)
 
  def __setitem__(self, key, value):
 
@@ -441,7 +417,9 @@ def can_do_equal_len():
  # align to
  if item in value:
  v = value[item]
- v = v.reindex(self.obj[item].index & v.index)
+ i = self.obj[item].index
+ v = v.reindex(i & v.index)
+
  setter(item, v.values)
  else:
  setter(item, np.nan)
@@ -909,20 +887,10 @@ def _reindex(keys, level=None):
  # asarray can be unsafe, NumPy strings are weird
  keyarr = _asarray_tuplesafe(key)
 
- if is_integer_dtype(keyarr) and not labels.is_floating():
- if labels.inferred_type != 'integer':
- keyarr = np.where(keyarr < 0,
- len(labels) + keyarr, keyarr)
-
- if labels.inferred_type == 'mixed-integer':
- indexer = labels.get_indexer(keyarr)
- if (indexer >= 0).all():
- self.obj.take(indexer, axis=axis, convert=True)
- else:
- return self.obj.take(keyarr, axis=axis)
- elif not labels.inferred_type == 'integer':
-
- return self.obj.take(keyarr, axis=axis)
+ # handle a mixed integer scenario
+ indexer = labels._convert_list_indexer_for_mixed(keyarr, typ=self.name)
+ if indexer is not None:
+ return self.obj.take(indexer, axis=axis)
 
  # this is not the most robust, but...
  if (isinstance(labels, MultiIndex) and
@@ -1062,11 +1030,9 @@ def _convert_to_indexer(self, obj, axis=0, is_setter=False):
  objarr = _asarray_tuplesafe(obj)
 
  # If have integer labels, defer to label-based indexing
- if is_integer_dtype(objarr) and not is_int_index:
- if labels.inferred_type != 'integer':
- objarr = np.where(objarr < 0,
- len(labels) + objarr, objarr)
- return objarr
+ indexer = labels._convert_list_indexer_for_mixed(objarr, typ=self.name)
+ if indexer is not None:
+ return indexer
 
  # this is not the most robust, but...
  if (isinstance(labels, MultiIndex) and
@@ -1353,8 +1319,7 @@ def _get_slice_axis(self, slice_obj, axis=0):
  return obj
 
  if isinstance(slice_obj, slice):
- return self._slice(slice_obj, axis=axis, raise_on_error=True,
- typ='iloc')
+ return self._slice(slice_obj, axis=axis, typ='iloc')
  else:
  return self.obj.take(slice_obj, axis=axis, convert=False)
 
@@ -1657,18 +1622,6 @@ def _need_slice(obj):
  (obj.step is not None and obj.step != 1))
 
 
-def _check_slice_bounds(slobj, values):
- l = len(values)
- start = slobj.start
- if start is not None:
- if start < -l or start > l - 1:
- raise IndexError("out-of-bounds on slice (start)")
- stop = slobj.stop
- if stop is not None:
- if stop < -l - 1 or stop > l:
- raise IndexError("out-of-bounds on slice (end)")
-
-
 def _maybe_droplevels(index, key):
  # drop levels
  original_index = index