Skip to content

Commit

Permalink
Backport PR pandas-dev#42318: PERF/REGR: symmetric_difference revert …
Browse files Browse the repository at this point in the history
…most of pandas-dev#41833
  • Loading branch information
jbrockmendel authored and simonjayhawkins committed Jun 30, 2021
1 parent f1d1367 commit 77f3cf4
Show file tree
Hide file tree
Showing 3 changed files with 58 additions and 6 deletions.
45 changes: 40 additions & 5 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -3254,12 +3254,47 @@ def symmetric_difference(self, other, result_name=None, sort=None):
if result_name is None:
result_name = result_name_update

left = self.difference(other, sort=False)
right = other.difference(self, sort=False)
result = left.union(right, sort=sort)
if not self._should_compare(other):
return self.union(other, sort=sort).rename(result_name)

elif not is_dtype_equal(self.dtype, other.dtype):
dtype = self._find_common_type_compat(other)
this = self.astype(dtype, copy=False)
that = other.astype(dtype, copy=False)
return this.symmetric_difference(that, sort=sort).rename(result_name)

this = self.unique()
other = other.unique()
indexer = this.get_indexer_for(other)

# {this} minus {other}
common_indexer = indexer.take((indexer != -1).nonzero()[0])
left_indexer = np.setdiff1d(
np.arange(this.size), common_indexer, assume_unique=True
)
left_diff = this._values.take(left_indexer)

# {other} minus {this}
right_indexer = (indexer == -1).nonzero()[0]
right_diff = other._values.take(right_indexer)

res_values = concat_compat([left_diff, right_diff])
res_values = _maybe_try_sort(res_values, sort)

result = Index(res_values, name=result_name)

if self._is_multi:
self = cast("MultiIndex", self)
if len(result) == 0:
# On equal symmetric_difference MultiIndexes the difference is empty.
# Therefore, an empty MultiIndex is returned GH#13490
return type(self)(
levels=[[] for _ in range(self.nlevels)],
codes=[[] for _ in range(self.nlevels)],
names=result.name,
)
return type(self).from_tuples(result, names=result.name)

if result_name is not None:
result = result.rename(result_name)
return result

@final
Expand Down
7 changes: 6 additions & 1 deletion pandas/core/indexes/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@
inherit_names,
make_wrapped_arith_op,
)
from pandas.core.indexes.numeric import Int64Index
from pandas.core.tools.timedeltas import to_timedelta

if TYPE_CHECKING:
Expand Down Expand Up @@ -784,7 +785,11 @@ def _union(self, other, sort):
# that result.freq == self.freq
return result
else:
return super()._union(other, sort=sort)._with_freq("infer")
i8self = Int64Index._simple_new(self.asi8)
i8other = Int64Index._simple_new(other.asi8)
i8result = i8self._union(i8other, sort=sort)
result = type(self)(i8result, dtype=self.dtype, freq="infer")
return result

# --------------------------------------------------------------------
# Join Methods
Expand Down
12 changes: 12 additions & 0 deletions pandas/core/indexes/range.py
Original file line number Diff line number Diff line change
Expand Up @@ -730,6 +730,18 @@ def _difference(self, other, sort=None):
new_index = new_index[::-1]
return new_index

def symmetric_difference(self, other, result_name: Hashable = None, sort=None):
if not isinstance(other, RangeIndex) or sort is not None:
return super().symmetric_difference(other, result_name, sort)

left = self.difference(other)
right = other.difference(self)
result = left.union(right)

if result_name is not None:
result = result.rename(result_name)
return result

# --------------------------------------------------------------------

def _concat(self, indexes: list[Index], name: Hashable) -> Index:
Expand Down

0 comments on commit 77f3cf4

Please sign in to comment.