Skip to content
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
BUG: Try to sort result of Index.union rather than guessing sortability
closes #17376
  • Loading branch information
toobaz authored and gfyoung committed Mar 10, 2018
commit 9fceb515f545f4f75a7f91aa388bca968a96631d
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.23.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -920,6 +920,7 @@ Numeric
Indexing
^^^^^^^^

- Bug in the order of the result of ``Index.union()`` when indexes contain tuples (:issue:`17376`)
- Bug in :class:`Index` construction from list of mixed type tuples (:issue:`18505`)
- Bug in :func:`Index.drop` when passing a list of both tuples and non-tuples (:issue:`18304`)
- Bug in :meth:`~DataFrame.drop`, :meth:`~Panel.drop`, :meth:`~Series.drop`, :meth:`~Index.drop` where no ``KeyError`` is raised when dropping a non-existent element from an axis that contains duplicates (:issue:`19186`)
Expand Down
28 changes: 7 additions & 21 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -2406,35 +2406,21 @@ def union(self, other):
value_set = set(lvals)
result.extend([x for x in rvals if x not in value_set])
else:
indexer = self.get_indexer(other)
indexer, = (indexer == -1).nonzero()

indexer = np.where(self.get_indexer(other) == -1)[0]
if len(indexer) > 0:
other_diff = algos.take_nd(rvals, indexer,
allow_fill=False)
result = _concat._concat_compat((lvals, other_diff))

try:
lvals[0] < other_diff[0]
except TypeError as e:
warnings.warn("%s, sort order is undefined for "
"incomparable objects" % e, RuntimeWarning,
stacklevel=3)
else:
types = frozenset((self.inferred_type,
other.inferred_type))
if not types & _unsortable_types:
result.sort()

else:
result = lvals

try:
result = np.sort(result)
except TypeError as e:
warnings.warn("%s, sort order is undefined for "
"incomparable objects" % e, RuntimeWarning,
stacklevel=3)
try:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you probably can simply use core.sorting.safe_sort here instead and just remove the warning entirely (as it fails in many less cases)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done, but I left the warning as it's still raised if not even safe_sort can sort them.

(I assume the failure is a problem with AppVeyor)

result = sorting.safe_sort(result)
except TypeError as e:
warnings.warn("%s, sort order is undefined for "
"incomparable objects" % e, RuntimeWarning,
stacklevel=3)

# for subclasses
return self._wrap_union_result(other, result)
Expand Down
51 changes: 20 additions & 31 deletions pandas/tests/indexes/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -784,8 +784,7 @@ def test_union(self):
expected = Index(list('ab'), name='A')
tm.assert_index_equal(union, expected)

with tm.assert_produces_warning(RuntimeWarning):
firstCat = self.strIndex.union(self.dateIndex)
firstCat = self.strIndex.union(self.dateIndex)
secondCat = self.strIndex.union(self.strIndex)

if self.dateIndex.dtype == np.object_:
Expand Down Expand Up @@ -1462,19 +1461,19 @@ def test_tuple_union_bug(self):
(2, 'B'), (1, 'C'), (2, 'C')],
dtype=[('num', int), ('let', 'a1')])

idx1 = pandas.Index(aidx1)
idx2 = pandas.Index(aidx2)
idx1 = Index(aidx1)
idx2 = Index(aidx2)

# intersection broken?
# intersection
int_idx = idx1.intersection(idx2)
expected = idx1 # pandas.Index(sorted(set(idx1) & set(idx2)))
# needs to be 1d like idx1 and idx2
expected = idx1[:4] # pandas.Index(sorted(set(idx1) & set(idx2)))
assert int_idx.ndim == 1
tm.assert_index_equal(int_idx, expected)

# union broken
# GH 17376 (union)
union_idx = idx1.union(idx2)
expected = idx2
expected = idx2.sort_values()
assert union_idx.ndim == 1
tm.assert_index_equal(union_idx, expected)

Expand Down Expand Up @@ -1664,13 +1663,19 @@ def test_outer_join_sort(self):
left_idx = Index(np.random.permutation(15))
right_idx = tm.makeDateIndex(10)

with tm.assert_produces_warning(RuntimeWarning):
if PY3:
with tm.assert_produces_warning(RuntimeWarning):
joined = left_idx.join(right_idx, how='outer')
else:
joined = left_idx.join(right_idx, how='outer')

# right_idx in this case because DatetimeIndex has join precedence over
# Int64Index
with tm.assert_produces_warning(RuntimeWarning):
expected = right_idx.astype(object).union(left_idx.astype(object))
if PY3:
with tm.assert_produces_warning(RuntimeWarning):
expected = right_idx.astype(object).union(left_idx)
else:
expected = right_idx.astype(object).union(left_idx)
tm.assert_index_equal(joined, expected)

def test_nan_first_take_datetime(self):
Expand Down Expand Up @@ -2059,10 +2064,7 @@ def test_copy_name(self):
s1 = Series(2, index=first)
s2 = Series(3, index=second[:-1])

warning_type = RuntimeWarning if PY3 else None
with tm.assert_produces_warning(warning_type):
# Python 3: Unorderable types
s3 = s1 * s2
s3 = s1 * s2

assert s3.index.name == 'mario'

Expand Down Expand Up @@ -2095,27 +2097,14 @@ def test_union_base(self):
first = idx[3:]
second = idx[:5]

if PY3:
with tm.assert_produces_warning(RuntimeWarning):
# unorderable types
result = first.union(second)
expected = Index(['b', 2, 'c', 0, 'a', 1])
tm.assert_index_equal(result, expected)
else:
result = first.union(second)
expected = Index(['b', 2, 'c', 0, 'a', 1])
tm.assert_index_equal(result, expected)
expected = Index([0, 1, 2, 'a', 'b', 'c'])
result = first.union(second)
tm.assert_index_equal(result, expected)

# GH 10149
cases = [klass(second.values)
for klass in [np.array, Series, list]]
for case in cases:
if PY3:
with tm.assert_produces_warning(RuntimeWarning):
# unorderable types
result = first.union(case)
assert tm.equalContents(result, idx)
else:
result = first.union(case)
assert tm.equalContents(result, idx)

Expand Down
26 changes: 5 additions & 21 deletions pandas/tests/series/test_operators.py
Original file line number Diff line number Diff line change
Expand Up @@ -431,11 +431,7 @@ def test_comparison_label_based(self):
assert_series_equal(result, a[a])

for e in [Series(['z'])]:
if compat.PY3:
with tm.assert_produces_warning(RuntimeWarning):
result = a[a | e]
else:
result = a[a | e]
result = a[a | e]
assert_series_equal(result, a[a])

# vs scalars
Expand Down Expand Up @@ -1472,24 +1468,12 @@ def test_operators_bitwise(self):
pytest.raises(TypeError, lambda: s_0123 & [0.1, 4, 3.14, 2])

# s_0123 will be all false now because of reindexing like s_tft
if compat.PY3:
# unable to sort incompatible object via .union.
exp = Series([False] * 7, index=['b', 'c', 'a', 0, 1, 2, 3])
with tm.assert_produces_warning(RuntimeWarning):
assert_series_equal(s_tft & s_0123, exp)
else:
exp = Series([False] * 7, index=[0, 1, 2, 3, 'a', 'b', 'c'])
assert_series_equal(s_tft & s_0123, exp)
exp = Series([False] * 7, index=[0, 1, 2, 3, 'a', 'b', 'c'])
assert_series_equal(s_tft & s_0123, exp)

# s_tft will be all false now because of reindexing like s_0123
if compat.PY3:
# unable to sort incompatible object via .union.
exp = Series([False] * 7, index=[0, 1, 2, 3, 'b', 'c', 'a'])
with tm.assert_produces_warning(RuntimeWarning):
assert_series_equal(s_0123 & s_tft, exp)
else:
exp = Series([False] * 7, index=[0, 1, 2, 3, 'a', 'b', 'c'])
assert_series_equal(s_0123 & s_tft, exp)
exp = Series([False] * 7, index=[0, 1, 2, 3, 'a', 'b', 'c'])
assert_series_equal(s_0123 & s_tft, exp)

assert_series_equal(s_0123 & False, Series([False] * 4))
assert_series_equal(s_0123 ^ False, Series([False, True, True, True]))
Expand Down