From c01c19ac571e6a49fdbf00901389f4254eda7197 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Mon, 8 Oct 2018 17:34:57 -0700 Subject: [PATCH 1/9] collect dispatch functions in one place --- pandas/core/ops.py | 281 +++++++++++++++++++++++---------------------- 1 file changed, 141 insertions(+), 140 deletions(-) diff --git a/pandas/core/ops.py b/pandas/core/ops.py index 20559bca9caed..3fcedc9c3007f 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -936,6 +936,147 @@ def should_series_dispatch(left, right, op): return False +def dispatch_to_series(left, right, func, str_rep=None, axis=None): + """ + Evaluate the frame operation func(left, right) by evaluating + column-by-column, dispatching to the Series implementation. + + Parameters + ---------- + left : DataFrame + right : scalar or DataFrame + func : arithmetic or comparison operator + str_rep : str or None, default None + axis : {None, 0, 1, "index", "columns"} + + Returns + ------- + DataFrame + """ + # Note: we use iloc to access columns for compat with cases + # with non-unique columns. + import pandas.core.computation.expressions as expressions + + right = lib.item_from_zerodim(right) + if lib.is_scalar(right): + + def column_op(a, b): + return {i: func(a.iloc[:, i], b) + for i in range(len(a.columns))} + + elif isinstance(right, ABCDataFrame): + assert right._indexed_same(left) + + def column_op(a, b): + return {i: func(a.iloc[:, i], b.iloc[:, i]) + for i in range(len(a.columns))} + + elif isinstance(right, ABCSeries) and axis == "columns": + # We only get here if called via left._combine_match_columns, + # in which case we specifically want to operate row-by-row + assert right.index.equals(left.columns) + + def column_op(a, b): + return {i: func(a.iloc[:, i], b.iloc[i]) + for i in range(len(a.columns))} + + elif isinstance(right, ABCSeries): + assert right.index.equals(left.index) # Handle other cases later + + def column_op(a, b): + return {i: func(a.iloc[:, i], b) + for i in range(len(a.columns))} + + else: + # Remaining cases have less-obvious dispatch rules + raise NotImplementedError(right) + + new_data = expressions.evaluate(column_op, str_rep, left, right) + + result = left._constructor(new_data, index=left.index, copy=False) + # Pin columns instead of passing to constructor for compat with + # non-unique columns case + result.columns = left.columns + return result + + +def dispatch_to_index_op(op, left, right, index_class): + """ + Wrap Series left in the given index_class to delegate the operation op + to the index implementation. DatetimeIndex and TimedeltaIndex perform + type checking, timezone handling, overflow checks, etc. + + Parameters + ---------- + op : binary operator (operator.add, operator.sub, ...) + left : Series + right : object + index_class : DatetimeIndex or TimedeltaIndex + + Returns + ------- + result : object, usually DatetimeIndex, TimedeltaIndex, or Series + """ + left_idx = index_class(left) + + # avoid accidentally allowing integer add/sub. For datetime64[tz] dtypes, + # left_idx may inherit a freq from a cached DatetimeIndex. + # See discussion in GH#19147. + if getattr(left_idx, 'freq', None) is not None: + left_idx = left_idx._shallow_copy(freq=None) + try: + result = op(left_idx, right) + except NullFrequencyError: + # DatetimeIndex and TimedeltaIndex with freq == None raise ValueError + # on add/sub of integers (or int-like). We re-raise as a TypeError. + raise TypeError('incompatible type for a datetime/timedelta ' + 'operation [{name}]'.format(name=op.__name__)) + return result + + +def dispatch_to_extension_op(op, left, right): + """ + Assume that left or right is a Series backed by an ExtensionArray, + apply the operator defined by op. + """ + + # The op calls will raise TypeError if the op is not defined + # on the ExtensionArray + # TODO(jreback) + # we need to listify to avoid ndarray, or non-same-type extension array + # dispatching + + if is_extension_array_dtype(left): + + new_left = left.values + if isinstance(right, np.ndarray): + + # handle numpy scalars, this is a PITA + # TODO(jreback) + new_right = lib.item_from_zerodim(right) + if is_scalar(new_right): + new_right = [new_right] + new_right = list(new_right) + elif is_extension_array_dtype(right) and type(left) != type(right): + new_right = list(right) + else: + new_right = right + + else: + + new_left = list(left.values) + new_right = right + + res_values = op(new_left, new_right) + res_name = get_op_result_name(left, right) + + if op.__name__ == 'divmod': + return _construct_divmod_result( + left, res_values, left.index, res_name) + + return _construct_result(left, res_values, left.index, res_name) + + # ----------------------------------------------------------------------------- # Functions that add arithmetic methods to objects, given arithmetic factory # methods @@ -1194,49 +1335,6 @@ def _construct_divmod_result(left, result, index, name, dtype=None): ) -def dispatch_to_extension_op(op, left, right): - """ - Assume that left or right is a Series backed by an ExtensionArray, - apply the operator defined by op. - """ - - # The op calls will raise TypeError if the op is not defined - # on the ExtensionArray - # TODO(jreback) - # we need to listify to avoid ndarray, or non-same-type extension array - # dispatching - - if is_extension_array_dtype(left): - - new_left = left.values - if isinstance(right, np.ndarray): - - # handle numpy scalars, this is a PITA - # TODO(jreback) - new_right = lib.item_from_zerodim(right) - if is_scalar(new_right): - new_right = [new_right] - new_right = list(new_right) - elif is_extension_array_dtype(right) and type(left) != type(right): - new_right = list(right) - else: - new_right = right - - else: - - new_left = list(left.values) - new_right = right - - res_values = op(new_left, new_right) - res_name = get_op_result_name(left, right) - - if op.__name__ == 'divmod': - return _construct_divmod_result( - left, res_values, left.index, res_name) - - return _construct_result(left, res_values, left.index, res_name) - - def _arith_method_SERIES(cls, op, special): """ Wrapper function for Series arithmetic operations, to avoid @@ -1334,40 +1432,6 @@ def wrapper(left, right): return wrapper -def dispatch_to_index_op(op, left, right, index_class): - """ - Wrap Series left in the given index_class to delegate the operation op - to the index implementation. DatetimeIndex and TimedeltaIndex perform - type checking, timezone handling, overflow checks, etc. - - Parameters - ---------- - op : binary operator (operator.add, operator.sub, ...) - left : Series - right : object - index_class : DatetimeIndex or TimedeltaIndex - - Returns - ------- - result : object, usually DatetimeIndex, TimedeltaIndex, or Series - """ - left_idx = index_class(left) - - # avoid accidentally allowing integer add/sub. For datetime64[tz] dtypes, - # left_idx may inherit a freq from a cached DatetimeIndex. - # See discussion in GH#19147. - if getattr(left_idx, 'freq', None) is not None: - left_idx = left_idx._shallow_copy(freq=None) - try: - result = op(left_idx, right) - except NullFrequencyError: - # DatetimeIndex and TimedeltaIndex with freq == None raise ValueError - # on add/sub of integers (or int-like). We re-raise as a TypeError. - raise TypeError('incompatible type for a datetime/timedelta ' - 'operation [{name}]'.format(name=op.__name__)) - return result - - def _comp_method_OBJECT_ARRAY(op, x, y): if isinstance(y, list): y = construct_1d_object_array_from_listlike(y) @@ -1666,69 +1730,6 @@ def flex_wrapper(self, other, level=None, fill_value=None, axis=0): # ----------------------------------------------------------------------------- # DataFrame -def dispatch_to_series(left, right, func, str_rep=None, axis=None): - """ - Evaluate the frame operation func(left, right) by evaluating - column-by-column, dispatching to the Series implementation. - - Parameters - ---------- - left : DataFrame - right : scalar or DataFrame - func : arithmetic or comparison operator - str_rep : str or None, default None - axis : {None, 0, 1, "index", "columns"} - - Returns - ------- - DataFrame - """ - # Note: we use iloc to access columns for compat with cases - # with non-unique columns. - import pandas.core.computation.expressions as expressions - - right = lib.item_from_zerodim(right) - if lib.is_scalar(right): - - def column_op(a, b): - return {i: func(a.iloc[:, i], b) - for i in range(len(a.columns))} - - elif isinstance(right, ABCDataFrame): - assert right._indexed_same(left) - - def column_op(a, b): - return {i: func(a.iloc[:, i], b.iloc[:, i]) - for i in range(len(a.columns))} - - elif isinstance(right, ABCSeries) and axis == "columns": - # We only get here if called via left._combine_match_columns, - # in which case we specifically want to operate row-by-row - assert right.index.equals(left.columns) - - def column_op(a, b): - return {i: func(a.iloc[:, i], b.iloc[i]) - for i in range(len(a.columns))} - - elif isinstance(right, ABCSeries): - assert right.index.equals(left.index) # Handle other cases later - - def column_op(a, b): - return {i: func(a.iloc[:, i], b) - for i in range(len(a.columns))} - - else: - # Remaining cases have less-obvious dispatch rules - raise NotImplementedError(right) - - new_data = expressions.evaluate(column_op, str_rep, left, right) - - result = left._constructor(new_data, index=left.index, copy=False) - # Pin columns instead of passing to constructor for compat with - # non-unique columns case - result.columns = left.columns - return result - def _combine_series_frame(self, other, func, fill_value=None, axis=None, level=None, try_cast=True): From f0e0a4e300fabf96f22f1597a5731f24c4f9f9aa Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Mon, 8 Oct 2018 17:42:33 -0700 Subject: [PATCH 2/9] remove unused try_cast args; try to make SparseDataFrame methods more like regular methods --- pandas/core/frame.py | 2 +- pandas/core/ops.py | 15 ++++++--------- pandas/core/sparse/frame.py | 19 +++++++------------ 3 files changed, 14 insertions(+), 22 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 986fe347898f5..98d4f933cee81 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4937,7 +4937,7 @@ def _combine_match_index(self, other, func, level=None): index=left.index, columns=self.columns, copy=False) - def _combine_match_columns(self, other, func, level=None, try_cast=True): + def _combine_match_columns(self, other, func, level=None): assert isinstance(other, Series) left, right = self.align(other, join='outer', axis=1, level=level, copy=False) diff --git a/pandas/core/ops.py b/pandas/core/ops.py index 3fcedc9c3007f..25a1689eed254 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -1732,7 +1732,7 @@ def flex_wrapper(self, other, level=None, fill_value=None, axis=0): def _combine_series_frame(self, other, func, fill_value=None, axis=None, - level=None, try_cast=True): + level=None): """ Apply binary operator `func` to self, other using alignment and fill conventions determined by the fill_value, axis, level, and try_cast kwargs. @@ -1745,7 +1745,6 @@ def _combine_series_frame(self, other, func, fill_value=None, axis=None, fill_value : object, default None axis : {0, 1, 'columns', 'index', None}, default None level : int or None, default None - try_cast : bool, default True Returns ------- @@ -1760,8 +1759,7 @@ def _combine_series_frame(self, other, func, fill_value=None, axis=None, if axis == 0: return self._combine_match_index(other, func, level=level) else: - return self._combine_match_columns(other, func, level=level, - try_cast=try_cast) + return self._combine_match_columns(other, func, level=level) else: if not len(other): return self * np.nan @@ -1772,8 +1770,7 @@ def _combine_series_frame(self, other, func, fill_value=None, axis=None, columns=self.columns) # default axis is columns - return self._combine_match_columns(other, func, level=level, - try_cast=try_cast) + return self._combine_match_columns(other, func, level=level) def _align_method_FRAME(left, right, axis): @@ -1878,7 +1875,7 @@ def f(self, other, axis=default_axis, level=None, fill_value=None): pass_op = op if axis in [0, "columns", None] else na_op return _combine_series_frame(self, other, pass_op, fill_value=fill_value, axis=axis, - level=level, try_cast=True) + level=level) else: if fill_value is not None: self = self.fillna(fill_value) @@ -1920,7 +1917,7 @@ def f(self, other, axis=default_axis, level=None): elif isinstance(other, ABCSeries): return _combine_series_frame(self, other, na_op, fill_value=None, axis=axis, - level=level, try_cast=False) + level=level) else: return self._combine_const(other, na_op, try_cast=False) @@ -1945,7 +1942,7 @@ def f(self, other): elif isinstance(other, ABCSeries): return _combine_series_frame(self, other, func, fill_value=None, axis=None, - level=None, try_cast=False) + level=None) else: # straight boolean comparisons we want to allow all columns diff --git a/pandas/core/sparse/frame.py b/pandas/core/sparse/frame.py index 58e3001bcfe6a..f52632fdbacec 100644 --- a/pandas/core/sparse/frame.py +++ b/pandas/core/sparse/frame.py @@ -537,12 +537,12 @@ def xs(self, key, axis=0, copy=False): # Arithmetic-related methods def _combine_frame(self, other, func, fill_value=None, level=None): - this, other = self.align(other, join='outer', level=level, copy=False) - new_index, new_columns = this.index, this.columns - if level is not None: raise NotImplementedError("'level' argument is not supported") + this, other = self.align(other, join='outer', level=level, copy=False) + new_index, new_columns = this.index, this.columns + if self.empty and other.empty: return self._constructor(index=new_index).__finalize__(self) @@ -585,13 +585,8 @@ def _combine_match_index(self, other, func, level=None): if level is not None: raise NotImplementedError("'level' argument is not supported") - new_index = self.index.union(other.index) - this = self - if self.index is not new_index: - this = self.reindex(new_index) - - if other.index is not new_index: - other = other.reindex(new_index) + this, other = self.align(other, join='outer', axis=0, level=level, + copy=False) for col, series in compat.iteritems(this): new_data[col] = func(series.values, other.values) @@ -604,10 +599,10 @@ def _combine_match_index(self, other, func, level=None): np.float64(other.fill_value)) return self._constructor( - new_data, index=new_index, columns=self.columns, + new_data, index=this.index, columns=self.columns, default_fill_value=fill_value).__finalize__(self) - def _combine_match_columns(self, other, func, level=None, try_cast=True): + def _combine_match_columns(self, other, func, level=None): # patched version of DataFrame._combine_match_columns to account for # NumPy circumventing __rsub__ with float64 types, e.g.: 3.0 - series, # where 3.0 is numpy.float64 and series is a SparseSeries. Still From 30f37374e2bf41bd3723d6153f44999e1cb3a262 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Mon, 8 Oct 2018 17:59:28 -0700 Subject: [PATCH 3/9] Use align methods in SparseDataFrame methods to move towards sharing code --- pandas/core/sparse/frame.py | 69 +++++++++++++++++++++++-------------- 1 file changed, 43 insertions(+), 26 deletions(-) diff --git a/pandas/core/sparse/frame.py b/pandas/core/sparse/frame.py index f52632fdbacec..0c299853e2869 100644 --- a/pandas/core/sparse/frame.py +++ b/pandas/core/sparse/frame.py @@ -562,17 +562,7 @@ def _combine_frame(self, other, func, fill_value=None, level=None): if col in this and col in other: new_data[col] = func(this[col], other[col]) - # if the fill values are the same use them? or use a valid one - new_fill_value = None - other_fill_value = getattr(other, 'default_fill_value', np.nan) - if self.default_fill_value == other_fill_value: - new_fill_value = self.default_fill_value - elif np.isnan(self.default_fill_value) and not np.isnan( - other_fill_value): - new_fill_value = other_fill_value - elif not np.isnan(self.default_fill_value) and np.isnan( - other_fill_value): - new_fill_value = self.default_fill_value + new_fill_value = self._get_op_result_fill_value(other, func) return self._constructor(data=new_data, index=new_index, columns=new_columns, @@ -591,12 +581,7 @@ def _combine_match_index(self, other, func, level=None): for col, series in compat.iteritems(this): new_data[col] = func(series.values, other.values) - # fill_value is a function of our operator - if isna(other.fill_value) or isna(self.default_fill_value): - fill_value = np.nan - else: - fill_value = func(np.float64(self.default_fill_value), - np.float64(other.fill_value)) + fill_value = self._get_op_result_fill_value(other, func) return self._constructor( new_data, index=this.index, columns=self.columns, @@ -611,24 +596,56 @@ def _combine_match_columns(self, other, func, level=None): if level is not None: raise NotImplementedError("'level' argument is not supported") - new_data = {} - - union = intersection = self.columns + left, right = self.align(other, join='outer', axis=1, level=level, + copy=False) + assert left.columns.equals(right.index) - if not union.equals(other.index): - union = other.index.union(self.columns) - intersection = other.index.intersection(self.columns) + new_data = {} - for col in intersection: - new_data[col] = func(self[col], float(other[col])) + for col in left.columns: + new_data[col] = func(left[col], float(right[col])) return self._constructor( - new_data, index=self.index, columns=union, + new_data, index=left.index, columns=left.columns, default_fill_value=self.default_fill_value).__finalize__(self) def _combine_const(self, other, func, errors='raise', try_cast=True): return self._apply_columns(lambda x: func(x, other)) + def _get_op_result_fill_value(self, other, func): + own_default = self.default_fill_value + + if isinstance(other, DataFrame): + # i.e. called from _combine_frame + + other_default = getattr(other, 'default_fill_value', np.nan) + + # if the fill values are the same use them? or use a valid one + if own_default == other_default: + # TOOD: won't this evaluate as False if both are np.nan? + fill_value = own_default + elif np.isnan(own_default) and not np.isnan(other_default): + fill_value = other_fill_value + elif not np.isnan(own_default) and np.isnan(other_default): + fill_value = own_default + else: + fill_value = None + + elif isinstance(other, SparseSeries): + # i.e. called from _combine_match_index + + # fill_value is a function of our operator + if isna(other.fill_value) or isna(own_default): + fill_value = np.nan + else: + fill_value = func(np.float64(own_default), + np.float64(other.fill_value)) + + else: + raise NotImplementedError(type(other)) + + return fill_value + def _reindex_index(self, index, method, copy, level, fill_value=np.nan, limit=None, takeable=False): if level is not None: From 5f9d11127678466b8e9fbaf3f8ea191539535e39 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Tue, 9 Oct 2018 08:19:20 -0700 Subject: [PATCH 4/9] typo fixup --- pandas/tests/arithmetic/test_numeric.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/arithmetic/test_numeric.py b/pandas/tests/arithmetic/test_numeric.py index 0449212713048..e9316221b125b 100644 --- a/pandas/tests/arithmetic/test_numeric.py +++ b/pandas/tests/arithmetic/test_numeric.py @@ -156,7 +156,7 @@ def test_numeric_arr_rdiv_tdscalar(self, three_days, numeric_idx, box): if box is not pd.Index and broken: # np.timedelta64(3, 'D') / 2 == np.timedelta64(1, 'D') raise pytest.xfail("timedelta64 not converted to nanos; " - "Tick division not imlpemented") + "Tick division not implemented") expected = TimedeltaIndex(['3 Days', '36 Hours']) From f23666350fe1dc3700235b8ffb27493ded678d98 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Tue, 9 Oct 2018 12:20:08 -0700 Subject: [PATCH 5/9] fixup copy/paste mistake --- pandas/core/sparse/frame.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/sparse/frame.py b/pandas/core/sparse/frame.py index 0c299853e2869..64b57c977be07 100644 --- a/pandas/core/sparse/frame.py +++ b/pandas/core/sparse/frame.py @@ -625,7 +625,7 @@ def _get_op_result_fill_value(self, other, func): # TOOD: won't this evaluate as False if both are np.nan? fill_value = own_default elif np.isnan(own_default) and not np.isnan(other_default): - fill_value = other_fill_value + fill_value = other_default elif not np.isnan(own_default) and np.isnan(other_default): fill_value = own_default else: From 1c9b86bc6616f50e7b825c4db9fcd9877bdd3e13 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Tue, 9 Oct 2018 17:55:57 -0700 Subject: [PATCH 6/9] keep collecting arithmetic tests --- pandas/tests/frame/test_arithmetic.py | 330 ++++++++++++++++++++++--- pandas/tests/frame/test_operators.py | 259 +------------------ pandas/tests/series/test_arithmetic.py | 58 +++++ pandas/tests/series/test_operators.py | 26 -- pandas/tests/series/test_period.py | 17 -- pandas/tests/series/test_timeseries.py | 10 - 6 files changed, 359 insertions(+), 341 deletions(-) diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index d0eb7cd35b268..ed196f92fa84b 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -1,4 +1,6 @@ # -*- coding: utf-8 -*- +from collections import deque +from datetime import datetime import operator import pytest @@ -16,28 +18,86 @@ # Comparisons class TestFrameComparisons(object): - def test_flex_comparison_nat(self): - # GH 15697, GH 22163 df.eq(pd.NaT) should behave like df == pd.NaT, - # and _definitely_ not be NaN - df = pd.DataFrame([pd.NaT]) - - result = df == pd.NaT - # result.iloc[0, 0] is a np.bool_ object - assert result.iloc[0, 0].item() is False - - result = df.eq(pd.NaT) - assert result.iloc[0, 0].item() is False - - result = df != pd.NaT - assert result.iloc[0, 0].item() is True - - result = df.ne(pd.NaT) - assert result.iloc[0, 0].item() is True + # Specifically _not_ flex-comparisons + + def test_comparison_invalid(self): + + def check(df, df2): + + for (x, y) in [(df, df2), (df2, df)]: + # we expect the result to match Series comparisons for + # == and !=, inequalities should raise + result = x == y + expected = pd.DataFrame({col: x[col] == y[col] + for col in x.columns}, + index=x.index, columns=x.columns) + tm.assert_frame_equal(result, expected) + + result = x != y + expected = pd.DataFrame({col: x[col] != y[col] + for col in x.columns}, + index=x.index, columns=x.columns) + tm.assert_frame_equal(result, expected) + + with pytest.raises(TypeError): + x >= y + with pytest.raises(TypeError): + x > y + with pytest.raises(TypeError): + x < y + with pytest.raises(TypeError): + x <= y + + # GH4968 + # invalid date/int comparisons + df = pd.DataFrame(np.random.randint(10, size=(10, 1)), columns=['a']) + df['dates'] = pd.date_range('20010101', periods=len(df)) + + df2 = df.copy() + df2['dates'] = df['a'] + check(df, df2) + + df = pd.DataFrame(np.random.randint(10, size=(10, 2)), + columns=['a', 'b']) + df2 = pd.DataFrame({'a': pd.date_range('20010101', periods=len(df)), + 'b': pd.date_range('20100101', periods=len(df))}) + check(df, df2) + + def test_timestamp_compare(self): + # make sure we can compare Timestamps on the right AND left hand side + # GH#4982 + df = pd. DataFrame({'dates1': pd.date_range('20010101', periods=10), + 'dates2': pd.date_range('20010102', periods=10), + 'intcol': np.random.randint(1000000000, size=10), + 'floatcol': np.random.randn(10), + 'stringcol': list(tm.rands(10))}) + df.loc[np.random.rand(len(df)) > 0.5, 'dates2'] = pd.NaT + ops = {'gt': 'lt', 'lt': 'gt', 'ge': 'le', 'le': 'ge', 'eq': 'eq', + 'ne': 'ne'} + + for left, right in ops.items(): + left_f = getattr(operator, left) + right_f = getattr(operator, right) + + # no nats + if left in ['eq', 'ne']: + expected = left_f(df, pd.Timestamp('20010109')) + result = right_f(pd.Timestamp('20010109'), df) + tm.assert_frame_equal(result, expected) + else: + with pytest.raises(TypeError): + left_f(df, pd.Timestamp('20010109')) + with pytest.raises(TypeError): + right_f(pd.Timestamp('20010109'), df) + # nats + expected = left_f(df, pd.Timestamp('nat')) + result = right_f(pd.Timestamp('nat'), df) + tm.assert_frame_equal(result, expected) def test_mixed_comparison(self): - # GH 13128, GH 22163 != datetime64 vs non-dt64 should be False, + # GH#13128, GH#22163 != datetime64 vs non-dt64 should be False, # not raise TypeError - # (this appears to be fixed before #22163, not sure when) + # (this appears to be fixed before GH#22163, not sure when) df = pd.DataFrame([['1989-08-01', 1], ['1989-08-01', 2]]) other = pd.DataFrame([['a', 'b'], ['c', 'd']]) @@ -47,17 +107,6 @@ def test_mixed_comparison(self): result = df != other assert result.all().all() - def test_df_boolean_comparison_error(self): - # GH 4576 - # boolean comparisons with a tuple/list give unexpected results - df = pd.DataFrame(np.arange(6).reshape((3, 2))) - - # not shape compatible - with pytest.raises(ValueError): - df == (2, 2) - with pytest.raises(ValueError): - df == [2, 2] - def test_df_float_none_comparison(self): df = pd.DataFrame(np.random.randn(8, 3), index=range(8), columns=['A', 'B', 'C']) @@ -75,6 +124,148 @@ def test_df_string_comparison(self): tm.assert_frame_equal(df[mask_b], df.loc[0:0, :]) tm.assert_frame_equal(df[-mask_b], df.loc[1:1, :]) + def test_df_boolean_comparison_error(self): + # GH#4576 + # boolean comparisons with a tuple/list give unexpected results + df = pd.DataFrame(np.arange(6).reshape((3, 2))) + + # not shape compatible + with pytest.raises(ValueError): + df == (2, 2) + with pytest.raises(ValueError): + df == [2, 2] + + +class TestFrameFlexComparisons(object): + # TODO: test_bool_flex_frame needs a better name + def test_bool_flex_frame(self): + data = np.random.randn(5, 3) + other_data = np.random.randn(5, 3) + df = pd.DataFrame(data) + other = pd.DataFrame(other_data) + ndim_5 = np.ones(df.shape + (1, 3)) + + # Unaligned + def _check_unaligned_frame(meth, op, df, other): + part_o = other.loc[3:, 1:].copy() + rs = meth(part_o) + xp = op(df, part_o.reindex(index=df.index, columns=df.columns)) + tm.assert_frame_equal(rs, xp) + + # DataFrame + assert df.eq(df).values.all() + assert not df.ne(df).values.any() + for op in ['eq', 'ne', 'gt', 'lt', 'ge', 'le']: + f = getattr(df, op) + o = getattr(operator, op) + # No NAs + tm.assert_frame_equal(f(other), o(df, other)) + _check_unaligned_frame(f, o, df, other) + # ndarray + tm.assert_frame_equal(f(other.values), o(df, other.values)) + # scalar + tm.assert_frame_equal(f(0), o(df, 0)) + # NAs + msg = "Unable to coerce to Series/DataFrame" + tm.assert_frame_equal(f(np.nan), o(df, np.nan)) + with tm.assert_raises_regex(ValueError, msg): + f(ndim_5) + + # Series + def _test_seq(df, idx_ser, col_ser): + idx_eq = df.eq(idx_ser, axis=0) + col_eq = df.eq(col_ser) + idx_ne = df.ne(idx_ser, axis=0) + col_ne = df.ne(col_ser) + tm.assert_frame_equal(col_eq, df == pd.Series(col_ser)) + tm.assert_frame_equal(col_eq, -col_ne) + tm.assert_frame_equal(idx_eq, -idx_ne) + tm.assert_frame_equal(idx_eq, df.T.eq(idx_ser).T) + tm.assert_frame_equal(col_eq, df.eq(list(col_ser))) + tm.assert_frame_equal(idx_eq, df.eq(pd.Series(idx_ser), axis=0)) + tm.assert_frame_equal(idx_eq, df.eq(list(idx_ser), axis=0)) + + idx_gt = df.gt(idx_ser, axis=0) + col_gt = df.gt(col_ser) + idx_le = df.le(idx_ser, axis=0) + col_le = df.le(col_ser) + + tm.assert_frame_equal(col_gt, df > pd.Series(col_ser)) + tm.assert_frame_equal(col_gt, -col_le) + tm.assert_frame_equal(idx_gt, -idx_le) + tm.assert_frame_equal(idx_gt, df.T.gt(idx_ser).T) + + idx_ge = df.ge(idx_ser, axis=0) + col_ge = df.ge(col_ser) + idx_lt = df.lt(idx_ser, axis=0) + col_lt = df.lt(col_ser) + tm.assert_frame_equal(col_ge, df >= pd.Series(col_ser)) + tm.assert_frame_equal(col_ge, -col_lt) + tm.assert_frame_equal(idx_ge, -idx_lt) + tm.assert_frame_equal(idx_ge, df.T.ge(idx_ser).T) + + idx_ser = pd.Series(np.random.randn(5)) + col_ser = pd.Series(np.random.randn(3)) + _test_seq(df, idx_ser, col_ser) + + # list/tuple + _test_seq(df, idx_ser.values, col_ser.values) + + # NA + df.loc[0, 0] = np.nan + rs = df.eq(df) + assert not rs.loc[0, 0] + rs = df.ne(df) + assert rs.loc[0, 0] + rs = df.gt(df) + assert not rs.loc[0, 0] + rs = df.lt(df) + assert not rs.loc[0, 0] + rs = df.ge(df) + assert not rs.loc[0, 0] + rs = df.le(df) + assert not rs.loc[0, 0] + + # complex + arr = np.array([np.nan, 1, 6, np.nan]) + arr2 = np.array([2j, np.nan, 7, None]) + df = pd.DataFrame({'a': arr}) + df2 = pd.DataFrame({'a': arr2}) + rs = df.gt(df2) + assert not rs.values.any() + rs = df.ne(df2) + assert rs.values.all() + + arr3 = np.array([2j, np.nan, None]) + df3 = pd.DataFrame({'a': arr3}) + rs = df3.gt(2j) + assert not rs.values.any() + + # corner, dtype=object + df1 = pd.DataFrame({'col': ['foo', np.nan, 'bar']}) + df2 = pd.DataFrame({'col': ['foo', datetime.now(), 'bar']}) + result = df1.ne(df2) + exp = pd.DataFrame({'col': [False, True, False]}) + tm.assert_frame_equal(result, exp) + + def test_flex_comparison_nat(self): + # GH 15697, GH 22163 df.eq(pd.NaT) should behave like df == pd.NaT, + # and _definitely_ not be NaN + df = pd.DataFrame([pd.NaT]) + + result = df == pd.NaT + # result.iloc[0, 0] is a np.bool_ object + assert result.iloc[0, 0].item() is False + + result = df.eq(pd.NaT) + assert result.iloc[0, 0].item() is False + + result = df != pd.NaT + assert result.iloc[0, 0].item() is True + + result = df.ne(pd.NaT) + assert result.iloc[0, 0].item() is True + @pytest.mark.parametrize('opname', ['eq', 'ne', 'gt', 'lt', 'ge', 'le']) def test_df_flex_cmp_constant_return_types(self, opname): # GH 15077, non-empty DataFrame @@ -375,3 +566,82 @@ def test_td64_df_add_int_frame(self): df - other with pytest.raises(TypeError): other - df + + def test_arith_mixed(self): + + left = pd.DataFrame({'A': ['a', 'b', 'c'], + 'B': [1, 2, 3]}) + + result = left + left + expected = pd.DataFrame({'A': ['aa', 'bb', 'cc'], + 'B': [2, 4, 6]}) + tm.assert_frame_equal(result, expected) + + def test_arith_getitem_commute(self): + df = pd.DataFrame({'A': [1.1, 3.3], 'B': [2.5, -3.9]}) + + def _test_op(df, op): + result = op(df, 1) + + if not df.columns.is_unique: + raise ValueError("Only unique columns supported by this test") + + for col in result.columns: + tm.assert_series_equal(result[col], op(df[col], 1)) + + _test_op(df, operator.add) + _test_op(df, operator.sub) + _test_op(df, operator.mul) + _test_op(df, operator.truediv) + _test_op(df, operator.floordiv) + _test_op(df, operator.pow) + + _test_op(df, lambda x, y: y + x) + _test_op(df, lambda x, y: y - x) + _test_op(df, lambda x, y: y * x) + _test_op(df, lambda x, y: y / x) + _test_op(df, lambda x, y: y ** x) + + _test_op(df, lambda x, y: x + y) + _test_op(df, lambda x, y: x - y) + _test_op(df, lambda x, y: x * y) + _test_op(df, lambda x, y: x / y) + _test_op(df, lambda x, y: x ** y) + + @pytest.mark.parametrize('values', [[1, 2], (1, 2), np.array([1, 2]), + range(1, 3), deque([1, 2])]) + def test_arith_alignment_non_pandas_object(self, values): + # GH#17901 + df = pd.DataFrame({'A': [1, 1], 'B': [1, 1]}) + expected = pd.DataFrame({'A': [2, 2], 'B': [3, 3]}) + result = df + values + tm.assert_frame_equal(result, expected) + + def test_arith_non_pandas_object(self): + df = pd.DataFrame(np.arange(1, 10, dtype='f8').reshape(3, 3), + columns=['one', 'two', 'three'], + index=['a', 'b', 'c']) + + val1 = df.xs('a').values + added = pd.DataFrame(df.values + val1, + index=df.index, columns=df.columns) + tm.assert_frame_equal(df + val1, added) + + added = pd.DataFrame((df.values.T + val1).T, + index=df.index, columns=df.columns) + tm.assert_frame_equal(df.add(val1, axis=0), added) + + val2 = list(df['two']) + + added = pd.DataFrame(df.values + val2, + index=df.index, columns=df.columns) + tm.assert_frame_equal(df + val2, added) + + added = pd.DataFrame((df.values.T + val2).T, index=df.index, + columns=df.columns) + tm.assert_frame_equal(df.add(val2, axis='index'), added) + + val3 = np.random.rand(*df.shape) + added = pd.DataFrame(df.values + val3, + index=df.index, columns=df.columns) + tm.assert_frame_equal(df.add(val3), added) diff --git a/pandas/tests/frame/test_operators.py b/pandas/tests/frame/test_operators.py index 433b0f09e13bc..cabf26fa02586 100644 --- a/pandas/tests/frame/test_operators.py +++ b/pandas/tests/frame/test_operators.py @@ -1,8 +1,6 @@ # -*- coding: utf-8 -*- from __future__ import print_function -from collections import deque -from datetime import datetime from decimal import Decimal import operator @@ -13,8 +11,7 @@ from pandas.compat import range from pandas import compat -from pandas import (DataFrame, Series, MultiIndex, Timestamp, - date_range) +from pandas import DataFrame, Series, MultiIndex import pandas.core.common as com import pandas as pd @@ -243,75 +240,6 @@ def test_operators_none_as_na(self, op): result = op(df.fillna(7), df) assert_frame_equal(result, expected, check_dtype=False) - def test_comparison_invalid(self): - - def check(df, df2): - - for (x, y) in [(df, df2), (df2, df)]: - # we expect the result to match Series comparisons for - # == and !=, inequalities should raise - result = x == y - expected = DataFrame({col: x[col] == y[col] - for col in x.columns}, - index=x.index, columns=x.columns) - assert_frame_equal(result, expected) - - result = x != y - expected = DataFrame({col: x[col] != y[col] - for col in x.columns}, - index=x.index, columns=x.columns) - assert_frame_equal(result, expected) - - pytest.raises(TypeError, lambda: x >= y) - pytest.raises(TypeError, lambda: x > y) - pytest.raises(TypeError, lambda: x < y) - pytest.raises(TypeError, lambda: x <= y) - - # GH4968 - # invalid date/int comparisons - df = DataFrame(np.random.randint(10, size=(10, 1)), columns=['a']) - df['dates'] = date_range('20010101', periods=len(df)) - - df2 = df.copy() - df2['dates'] = df['a'] - check(df, df2) - - df = DataFrame(np.random.randint(10, size=(10, 2)), columns=['a', 'b']) - df2 = DataFrame({'a': date_range('20010101', periods=len( - df)), 'b': date_range('20100101', periods=len(df))}) - check(df, df2) - - def test_timestamp_compare(self): - # make sure we can compare Timestamps on the right AND left hand side - # GH4982 - df = DataFrame({'dates1': date_range('20010101', periods=10), - 'dates2': date_range('20010102', periods=10), - 'intcol': np.random.randint(1000000000, size=10), - 'floatcol': np.random.randn(10), - 'stringcol': list(tm.rands(10))}) - df.loc[np.random.rand(len(df)) > 0.5, 'dates2'] = pd.NaT - ops = {'gt': 'lt', 'lt': 'gt', 'ge': 'le', 'le': 'ge', 'eq': 'eq', - 'ne': 'ne'} - - for left, right in ops.items(): - left_f = getattr(operator, left) - right_f = getattr(operator, right) - - # no nats - if left in ['eq', 'ne']: - expected = left_f(df, Timestamp('20010109')) - result = right_f(Timestamp('20010109'), df) - assert_frame_equal(result, expected) - else: - with pytest.raises(TypeError): - left_f(df, Timestamp('20010109')) - with pytest.raises(TypeError): - right_f(Timestamp('20010109'), df) - # nats - expected = left_f(df, Timestamp('nat')) - result = right_f(Timestamp('nat'), df) - assert_frame_equal(result, expected) - @pytest.mark.parametrize('op,res', [('__eq__', False), ('__ne__', True)]) # TODO: not sure what's correct here. @@ -385,158 +313,6 @@ def test_binary_ops_align(self): for res in [res3, res4, res5, res6]: assert_frame_equal(res, exp) - def test_arith_mixed(self): - - left = DataFrame({'A': ['a', 'b', 'c'], - 'B': [1, 2, 3]}) - - result = left + left - expected = DataFrame({'A': ['aa', 'bb', 'cc'], - 'B': [2, 4, 6]}) - assert_frame_equal(result, expected) - - def test_arith_getitem_commute(self): - df = DataFrame({'A': [1.1, 3.3], 'B': [2.5, -3.9]}) - - self._test_op(df, operator.add) - self._test_op(df, operator.sub) - self._test_op(df, operator.mul) - self._test_op(df, operator.truediv) - self._test_op(df, operator.floordiv) - self._test_op(df, operator.pow) - - self._test_op(df, lambda x, y: y + x) - self._test_op(df, lambda x, y: y - x) - self._test_op(df, lambda x, y: y * x) - self._test_op(df, lambda x, y: y / x) - self._test_op(df, lambda x, y: y ** x) - - self._test_op(df, lambda x, y: x + y) - self._test_op(df, lambda x, y: x - y) - self._test_op(df, lambda x, y: x * y) - self._test_op(df, lambda x, y: x / y) - self._test_op(df, lambda x, y: x ** y) - - @staticmethod - def _test_op(df, op): - result = op(df, 1) - - if not df.columns.is_unique: - raise ValueError("Only unique columns supported by this test") - - for col in result.columns: - assert_series_equal(result[col], op(df[col], 1)) - - def test_bool_flex_frame(self): - data = np.random.randn(5, 3) - other_data = np.random.randn(5, 3) - df = DataFrame(data) - other = DataFrame(other_data) - ndim_5 = np.ones(df.shape + (1, 3)) - - # Unaligned - def _check_unaligned_frame(meth, op, df, other): - part_o = other.loc[3:, 1:].copy() - rs = meth(part_o) - xp = op(df, part_o.reindex(index=df.index, columns=df.columns)) - assert_frame_equal(rs, xp) - - # DataFrame - assert df.eq(df).values.all() - assert not df.ne(df).values.any() - for op in ['eq', 'ne', 'gt', 'lt', 'ge', 'le']: - f = getattr(df, op) - o = getattr(operator, op) - # No NAs - assert_frame_equal(f(other), o(df, other)) - _check_unaligned_frame(f, o, df, other) - # ndarray - assert_frame_equal(f(other.values), o(df, other.values)) - # scalar - assert_frame_equal(f(0), o(df, 0)) - # NAs - msg = "Unable to coerce to Series/DataFrame" - assert_frame_equal(f(np.nan), o(df, np.nan)) - with tm.assert_raises_regex(ValueError, msg): - f(ndim_5) - - # Series - def _test_seq(df, idx_ser, col_ser): - idx_eq = df.eq(idx_ser, axis=0) - col_eq = df.eq(col_ser) - idx_ne = df.ne(idx_ser, axis=0) - col_ne = df.ne(col_ser) - assert_frame_equal(col_eq, df == Series(col_ser)) - assert_frame_equal(col_eq, -col_ne) - assert_frame_equal(idx_eq, -idx_ne) - assert_frame_equal(idx_eq, df.T.eq(idx_ser).T) - assert_frame_equal(col_eq, df.eq(list(col_ser))) - assert_frame_equal(idx_eq, df.eq(Series(idx_ser), axis=0)) - assert_frame_equal(idx_eq, df.eq(list(idx_ser), axis=0)) - - idx_gt = df.gt(idx_ser, axis=0) - col_gt = df.gt(col_ser) - idx_le = df.le(idx_ser, axis=0) - col_le = df.le(col_ser) - - assert_frame_equal(col_gt, df > Series(col_ser)) - assert_frame_equal(col_gt, -col_le) - assert_frame_equal(idx_gt, -idx_le) - assert_frame_equal(idx_gt, df.T.gt(idx_ser).T) - - idx_ge = df.ge(idx_ser, axis=0) - col_ge = df.ge(col_ser) - idx_lt = df.lt(idx_ser, axis=0) - col_lt = df.lt(col_ser) - assert_frame_equal(col_ge, df >= Series(col_ser)) - assert_frame_equal(col_ge, -col_lt) - assert_frame_equal(idx_ge, -idx_lt) - assert_frame_equal(idx_ge, df.T.ge(idx_ser).T) - - idx_ser = Series(np.random.randn(5)) - col_ser = Series(np.random.randn(3)) - _test_seq(df, idx_ser, col_ser) - - # list/tuple - _test_seq(df, idx_ser.values, col_ser.values) - - # NA - df.loc[0, 0] = np.nan - rs = df.eq(df) - assert not rs.loc[0, 0] - rs = df.ne(df) - assert rs.loc[0, 0] - rs = df.gt(df) - assert not rs.loc[0, 0] - rs = df.lt(df) - assert not rs.loc[0, 0] - rs = df.ge(df) - assert not rs.loc[0, 0] - rs = df.le(df) - assert not rs.loc[0, 0] - - # complex - arr = np.array([np.nan, 1, 6, np.nan]) - arr2 = np.array([2j, np.nan, 7, None]) - df = DataFrame({'a': arr}) - df2 = DataFrame({'a': arr2}) - rs = df.gt(df2) - assert not rs.values.any() - rs = df.ne(df2) - assert rs.values.all() - - arr3 = np.array([2j, np.nan, None]) - df3 = DataFrame({'a': arr3}) - rs = df3.gt(2j) - assert not rs.values.any() - - # corner, dtype=object - df1 = DataFrame({'col': ['foo', np.nan, 'bar']}) - df2 = DataFrame({'col': ['foo', datetime.now(), 'bar']}) - result = df1.ne(df2) - exp = DataFrame({'col': [False, True, False]}) - assert_frame_equal(result, exp) - def test_dti_tz_convert_to_utc(self): base = pd.DatetimeIndex(['2011-01-01', '2011-01-02', '2011-01-03'], tz='UTC') @@ -548,39 +324,6 @@ def test_dti_tz_convert_to_utc(self): exp = DataFrame({'A': [np.nan, 3, np.nan]}, index=base) assert_frame_equal(df1 + df2, exp) - def test_arith_non_pandas_object(self): - df = self.simple - - val1 = df.xs('a').values - added = DataFrame(df.values + val1, index=df.index, columns=df.columns) - assert_frame_equal(df + val1, added) - - added = DataFrame((df.values.T + val1).T, - index=df.index, columns=df.columns) - assert_frame_equal(df.add(val1, axis=0), added) - - val2 = list(df['two']) - - added = DataFrame(df.values + val2, index=df.index, columns=df.columns) - assert_frame_equal(df + val2, added) - - added = DataFrame((df.values.T + val2).T, index=df.index, - columns=df.columns) - assert_frame_equal(df.add(val2, axis='index'), added) - - val3 = np.random.rand(*df.shape) - added = DataFrame(df.values + val3, index=df.index, columns=df.columns) - assert_frame_equal(df.add(val3), added) - - @pytest.mark.parametrize('values', [[1, 2], (1, 2), np.array([1, 2]), - range(1, 3), deque([1, 2])]) - def test_arith_alignment_non_pandas_object(self, values): - # GH 17901 - df = DataFrame({'A': [1, 1], 'B': [1, 1]}) - expected = DataFrame({'A': [2, 2], 'B': [3, 3]}) - result = df + values - assert_frame_equal(result, expected) - def test_combineFrame(self): frame_copy = self.frame.reindex(self.frame.index[::2]) diff --git a/pandas/tests/series/test_arithmetic.py b/pandas/tests/series/test_arithmetic.py index 37ba1c91368b3..0748ed6066656 100644 --- a/pandas/tests/series/test_arithmetic.py +++ b/pandas/tests/series/test_arithmetic.py @@ -2,13 +2,71 @@ import operator import pytest +import numpy as np from pandas import Series +from pandas.core.indexes.period import IncompatibleFrequency import pandas as pd import pandas.util.testing as tm +class TestSeriesArithmetic(object): + # Some of these may end up in tests/arithmetic, but are not yet sorted + + def test_empty_series_add_sub(self): + # GH#13844 + a = Series(dtype='M8[ns]') + b = Series(dtype='m8[ns]') + tm.assert_series_equal(a, a + b) + tm.assert_series_equal(a, a - b) + tm.assert_series_equal(a, b + a) + with pytest.raises(TypeError): + b - a + + def test_add_series_with_period_index(self): + rng = pd.period_range('1/1/2000', '1/1/2010', freq='A') + ts = Series(np.random.randn(len(rng)), index=rng) + + result = ts + ts[::2] + expected = ts + ts + expected[1::2] = np.nan + tm.assert_series_equal(result, expected) + + result = ts + _permute(ts[::2]) + tm.assert_series_equal(result, expected) + + msg = "Input has different freq=D from PeriodIndex\\(freq=A-DEC\\)" + with tm.assert_raises_regex(IncompatibleFrequency, msg): + ts + ts.asfreq('D', how="end") + + def test_operators_datetimelike(self): + + # ## timedelta64 ### + td1 = Series([timedelta(minutes=5, seconds=3)] * 3) + td1.iloc[2] = np.nan + + # ## datetime64 ### + dt1 = Series([pd.Timestamp('20111230'), pd.Timestamp('20120101'), + pd.Timestamp('20120103')]) + dt1.iloc[2] = np.nan + dt2 = Series([Timestamp('20111231'), pd.Timestamp('20120102'), + pd.Timestamp('20120104')]) + dt1 - dt2 + dt2 - dt1 + + # ## datetime64 with timetimedelta ### + dt1 + td1 + td1 + dt1 + dt1 - td1 + # TODO: Decide if this ought to work. + # td1 - dt1 + + # ## timetimedelta with datetime64 ### + td1 + dt1 + dt1 + td1 + + # ------------------------------------------------------------------ # Comparisons diff --git a/pandas/tests/series/test_operators.py b/pandas/tests/series/test_operators.py index 55e3dfde3ceb7..132b833be95f6 100644 --- a/pandas/tests/series/test_operators.py +++ b/pandas/tests/series/test_operators.py @@ -753,32 +753,6 @@ def check(get_ser, test_ser): if op_str not in ['__add__', '__radd__', '__sub__', '__rsub__']: check(dt2, td2) - def test_operators_datetimelike(self): - - # ## timedelta64 ### - td1 = Series([timedelta(minutes=5, seconds=3)] * 3) - td1.iloc[2] = np.nan - - # ## datetime64 ### - dt1 = Series([Timestamp('20111230'), Timestamp('20120101'), - Timestamp('20120103')]) - dt1.iloc[2] = np.nan - dt2 = Series([Timestamp('20111231'), Timestamp('20120102'), - Timestamp('20120104')]) - dt1 - dt2 - dt2 - dt1 - - # ## datetime64 with timetimedelta ### - dt1 + td1 - td1 + dt1 - dt1 - td1 - # TODO: Decide if this ought to work. - # td1 - dt1 - - # ## timetimedelta with datetime64 ### - td1 + dt1 - dt1 + td1 - class TestSeriesOperators(TestData): @pytest.mark.parametrize( diff --git a/pandas/tests/series/test_period.py b/pandas/tests/series/test_period.py index 24c2f30bef569..0cce92d2d5d6f 100644 --- a/pandas/tests/series/test_period.py +++ b/pandas/tests/series/test_period.py @@ -3,7 +3,6 @@ import pandas as pd import pandas.util.testing as tm -import pandas.core.indexes.period as period from pandas import Series, period_range, DataFrame, Period @@ -119,22 +118,6 @@ def test_intercept_astype_object(self): result = df.values.squeeze() assert (result[:, 0] == expected.values).all() - def test_add_series(self): - rng = period_range('1/1/2000', '1/1/2010', freq='A') - ts = Series(np.random.randn(len(rng)), index=rng) - - result = ts + ts[::2] - expected = ts + ts - expected[1::2] = np.nan - tm.assert_series_equal(result, expected) - - result = ts + _permute(ts[::2]) - tm.assert_series_equal(result, expected) - - msg = "Input has different freq=D from PeriodIndex\\(freq=A-DEC\\)" - with tm.assert_raises_regex(period.IncompatibleFrequency, msg): - ts + ts.asfreq('D', how="end") - def test_align_series(self, join_type): rng = period_range('1/1/2000', '1/1/2010', freq='A') ts = Series(np.random.randn(len(rng)), index=rng) diff --git a/pandas/tests/series/test_timeseries.py b/pandas/tests/series/test_timeseries.py index 72492de4b1247..aa4c862cf1051 100644 --- a/pandas/tests/series/test_timeseries.py +++ b/pandas/tests/series/test_timeseries.py @@ -456,16 +456,6 @@ def test_timeseries_coercion(self): assert ser.index.is_all_dates assert isinstance(ser.index, DatetimeIndex) - def test_empty_series_ops(self): - # see issue #13844 - a = Series(dtype='M8[ns]') - b = Series(dtype='m8[ns]') - assert_series_equal(a, a + b) - assert_series_equal(a, a - b) - assert_series_equal(a, b + a) - with pytest.raises(TypeError): - b - a - def test_contiguous_boolean_preserve_freq(self): rng = date_range('1/1/2000', '3/1/2000', freq='B') From a2d1a56cb29d14a1b747c14f2e203830990458af Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Tue, 9 Oct 2018 18:04:20 -0700 Subject: [PATCH 7/9] keep collecting Series arith tests --- pandas/tests/arithmetic/test_datetime64.py | 45 ++++++++ pandas/tests/series/test_arithmetic.py | 88 ++++++++++++++- pandas/tests/series/test_operators.py | 125 +-------------------- pandas/tests/series/test_period.py | 4 - 4 files changed, 132 insertions(+), 130 deletions(-) diff --git a/pandas/tests/arithmetic/test_datetime64.py b/pandas/tests/arithmetic/test_datetime64.py index 36bb0aca066fb..75b1c26b9c323 100644 --- a/pandas/tests/arithmetic/test_datetime64.py +++ b/pandas/tests/arithmetic/test_datetime64.py @@ -674,6 +674,51 @@ def test_dt64ser_sub_datetime_dtype(self): # TODO: This next block of tests came from tests.series.test_operators, # needs to be de-duplicated and parametrized over `box` classes + def test_operators_datetimelike_invalid(self, all_arithmetic_operators): + # these are all TypeEror ops + op_str = all_arithmetic_operators + + def check(get_ser, test_ser): + + # check that we are getting a TypeError + # with 'operate' (from core/ops.py) for the ops that are not + # defined + op = getattr(get_ser, op_str, None) + with tm.assert_raises_regex(TypeError, 'operate|cannot'): + op(test_ser) + + # ## timedelta64 ### + td1 = Series([timedelta(minutes=5, seconds=3)] * 3) + td1.iloc[2] = np.nan + + # ## datetime64 ### + dt1 = Series([Timestamp('20111230'), Timestamp('20120101'), + Timestamp('20120103')]) + dt1.iloc[2] = np.nan + dt2 = Series([Timestamp('20111231'), Timestamp('20120102'), + Timestamp('20120104')]) + if op_str not in ['__sub__', '__rsub__']: + check(dt1, dt2) + + # ## datetime64 with timetimedelta ### + # TODO(jreback) __rsub__ should raise? + if op_str not in ['__add__', '__radd__', '__sub__']: + check(dt1, td1) + + # 8260, 10763 + # datetime64 with tz + tz = 'US/Eastern' + dt1 = Series(date_range('2000-01-01 09:00:00', periods=5, + tz=tz), name='foo') + dt2 = dt1.copy() + dt2.iloc[2] = np.nan + td1 = Series(pd.timedelta_range('1 days 1 min', periods=5, freq='H')) + td2 = td1.copy() + td2.iloc[1] = np.nan + + if op_str not in ['__add__', '__radd__', '__sub__', '__rsub__']: + check(dt2, td2) + @pytest.mark.parametrize('klass', [Series, pd.Index]) def test_sub_datetime64_not_ns(self, klass): # GH#7996 diff --git a/pandas/tests/series/test_arithmetic.py b/pandas/tests/series/test_arithmetic.py index 0748ed6066656..182f3826d521c 100644 --- a/pandas/tests/series/test_arithmetic.py +++ b/pandas/tests/series/test_arithmetic.py @@ -1,16 +1,62 @@ # -*- coding: utf-8 -*- +from datetime import timedelta import operator import pytest import numpy as np -from pandas import Series +from pandas import compat, Series from pandas.core.indexes.period import IncompatibleFrequency import pandas as pd import pandas.util.testing as tm +def _permute(obj): + return obj.take(np.random.permutation(len(obj))) + + +class TestSeriesFlexArithmetic(object): + @pytest.mark.parametrize( + 'ts', + [ + (lambda x: x, lambda x: x * 2, False), + (lambda x: x, lambda x: x[::2], False), + (lambda x: x, lambda x: 5, True), + (lambda x: tm.makeFloatSeries(), + lambda x: tm.makeFloatSeries(), + True) + ]) + @pytest.mark.parametrize('opname', ['add', 'sub', 'mul', 'floordiv', + 'truediv', 'div', 'pow']) + def test_flex_method_equivalence(self, opname, ts): + # check that Series.{opname} behaves like Series.__{opname}__, + tser = tm.makeTimeSeries().rename('ts') + + series = ts[0](tser) + other = ts[1](tser) + check_reverse = ts[2] + + if opname == 'div' and compat.PY3: + pytest.skip('div test only for Py3') + + op = getattr(Series, opname) + + if op == 'div': + alt = operator.truediv + else: + alt = getattr(operator, opname) + + result = op(series, other) + expected = alt(series, other) + tm.assert_almost_equal(result, expected) + if check_reverse: + rop = getattr(Series, "r" + opname) + result = rop(series, other) + expected = alt(other, series) + tm.assert_almost_equal(result, expected) + + class TestSeriesArithmetic(object): # Some of these may end up in tests/arithmetic, but are not yet sorted @@ -50,7 +96,7 @@ def test_operators_datetimelike(self): dt1 = Series([pd.Timestamp('20111230'), pd.Timestamp('20120101'), pd.Timestamp('20120103')]) dt1.iloc[2] = np.nan - dt2 = Series([Timestamp('20111231'), pd.Timestamp('20120102'), + dt2 = Series([pd.Timestamp('20111231'), pd.Timestamp('20120102'), pd.Timestamp('20120104')]) dt1 - dt2 dt2 - dt1 @@ -70,7 +116,45 @@ def test_operators_datetimelike(self): # ------------------------------------------------------------------ # Comparisons +class TestSeriesFlexComparison(object): + def test_comparison_flex_basic(self): + left = pd.Series(np.random.randn(10)) + right = pd.Series(np.random.randn(10)) + + tm.assert_series_equal(left.eq(right), left == right) + tm.assert_series_equal(left.ne(right), left != right) + tm.assert_series_equal(left.le(right), left < right) + tm.assert_series_equal(left.lt(right), left <= right) + tm.assert_series_equal(left.gt(right), left > right) + tm.assert_series_equal(left.ge(right), left >= right) + + # axis + for axis in [0, None, 'index']: + tm.assert_series_equal(left.eq(right, axis=axis), left == right) + tm.assert_series_equal(left.ne(right, axis=axis), left != right) + tm.assert_series_equal(left.le(right, axis=axis), left < right) + tm.assert_series_equal(left.lt(right, axis=axis), left <= right) + tm.assert_series_equal(left.gt(right, axis=axis), left > right) + tm.assert_series_equal(left.ge(right, axis=axis), left >= right) + + # + msg = 'No axis named 1 for object type' + for op in ['eq', 'ne', 'le', 'le', 'gt', 'ge']: + with tm.assert_raises_regex(ValueError, msg): + getattr(left, op)(right, axis=1) + + class TestSeriesComparison(object): + def test_comparison_different_length(self): + a = Series(['a', 'b', 'c']) + b = Series(['b', 'a']) + with pytest.raises(ValueError): + a < b + + a = Series([1, 2]) + b = Series([2, 3, 4]) + with pytest.raises(ValueError): + a == b @pytest.mark.parametrize('opname', ['eq', 'ne', 'gt', 'lt', 'ge', 'le']) def test_ser_flex_cmp_return_dtypes(self, opname): diff --git a/pandas/tests/series/test_operators.py b/pandas/tests/series/test_operators.py index 132b833be95f6..805e20c0ae182 100644 --- a/pandas/tests/series/test_operators.py +++ b/pandas/tests/series/test_operators.py @@ -11,8 +11,7 @@ import pandas as pd from pandas import (Index, Series, DataFrame, isna, bdate_range, - NaT, date_range, timedelta_range, Categorical) -from pandas.core.indexes.datetimes import Timestamp + NaT, date_range, Categorical) import pandas.core.nanops as nanops from pandas.core import ops @@ -586,17 +585,6 @@ def test_nat_comparisons(self, dtype, box, reverse, pair): expected = Series([False, False, True]) assert_series_equal(left <= right, expected) - def test_comparison_different_length(self): - a = Series(['a', 'b', 'c']) - b = Series(['b', 'a']) - with pytest.raises(ValueError): - a < b - - a = Series([1, 2]) - b = Series([2, 3, 4]) - with pytest.raises(ValueError): - a == b - def test_ne(self): ts = Series([3, 4, 5, 6, 7], [3, 4, 5, 6, 7], dtype=float) expected = [True, True, False, True, True] @@ -635,31 +623,6 @@ def test_comp_ops_df_compat(self): class TestSeriesFlexComparisonOps(object): - def test_comparison_flex_basic(self): - left = pd.Series(np.random.randn(10)) - right = pd.Series(np.random.randn(10)) - - assert_series_equal(left.eq(right), left == right) - assert_series_equal(left.ne(right), left != right) - assert_series_equal(left.le(right), left < right) - assert_series_equal(left.lt(right), left <= right) - assert_series_equal(left.gt(right), left > right) - assert_series_equal(left.ge(right), left >= right) - - # axis - for axis in [0, None, 'index']: - assert_series_equal(left.eq(right, axis=axis), left == right) - assert_series_equal(left.ne(right, axis=axis), left != right) - assert_series_equal(left.le(right, axis=axis), left < right) - assert_series_equal(left.lt(right, axis=axis), left <= right) - assert_series_equal(left.gt(right, axis=axis), left > right) - assert_series_equal(left.ge(right, axis=axis), left >= right) - - # - msg = 'No axis named 1 for object type' - for op in ['eq', 'ne', 'le', 'le', 'gt', 'ge']: - with tm.assert_raises_regex(ValueError, msg): - getattr(left, op)(right, axis=1) def test_comparison_flex_alignment(self): left = Series([1, 3, 2], index=list('abc')) @@ -706,93 +669,7 @@ def test_comparison_flex_alignment_fill(self): assert_series_equal(left.gt(right, fill_value=0), exp) -class TestDatetimeSeriesArithmetic(object): - - def test_operators_datetimelike_invalid(self, all_arithmetic_operators): - # these are all TypeEror ops - op_str = all_arithmetic_operators - - def check(get_ser, test_ser): - - # check that we are getting a TypeError - # with 'operate' (from core/ops.py) for the ops that are not - # defined - op = getattr(get_ser, op_str, None) - with tm.assert_raises_regex(TypeError, 'operate|cannot'): - op(test_ser) - - # ## timedelta64 ### - td1 = Series([timedelta(minutes=5, seconds=3)] * 3) - td1.iloc[2] = np.nan - - # ## datetime64 ### - dt1 = Series([Timestamp('20111230'), Timestamp('20120101'), - Timestamp('20120103')]) - dt1.iloc[2] = np.nan - dt2 = Series([Timestamp('20111231'), Timestamp('20120102'), - Timestamp('20120104')]) - if op_str not in ['__sub__', '__rsub__']: - check(dt1, dt2) - - # ## datetime64 with timetimedelta ### - # TODO(jreback) __rsub__ should raise? - if op_str not in ['__add__', '__radd__', '__sub__']: - check(dt1, td1) - - # 8260, 10763 - # datetime64 with tz - tz = 'US/Eastern' - dt1 = Series(date_range('2000-01-01 09:00:00', periods=5, - tz=tz), name='foo') - dt2 = dt1.copy() - dt2.iloc[2] = np.nan - td1 = Series(timedelta_range('1 days 1 min', periods=5, freq='H')) - td2 = td1.copy() - td2.iloc[1] = np.nan - - if op_str not in ['__add__', '__radd__', '__sub__', '__rsub__']: - check(dt2, td2) - - class TestSeriesOperators(TestData): - @pytest.mark.parametrize( - 'ts', - [ - (lambda x: x, lambda x: x * 2, False), - (lambda x: x, lambda x: x[::2], False), - (lambda x: x, lambda x: 5, True), - (lambda x: tm.makeFloatSeries(), - lambda x: tm.makeFloatSeries(), - True) - ]) - @pytest.mark.parametrize('opname', ['add', 'sub', 'mul', 'floordiv', - 'truediv', 'div', 'pow']) - def test_op_method(self, opname, ts): - # check that Series.{opname} behaves like Series.__{opname}__, - tser = tm.makeTimeSeries().rename('ts') - - series = ts[0](tser) - other = ts[1](tser) - check_reverse = ts[2] - - if opname == 'div' and compat.PY3: - pytest.skip('div test only for Py3') - - op = getattr(Series, opname) - - if op == 'div': - alt = operator.truediv - else: - alt = getattr(operator, opname) - - result = op(series, other) - expected = alt(series, other) - assert_almost_equal(result, expected) - if check_reverse: - rop = getattr(Series, "r" + opname) - result = rop(series, other) - expected = alt(other, series) - assert_almost_equal(result, expected) def test_operators_empty_int_corner(self): s1 = Series([], [], dtype=np.int32) diff --git a/pandas/tests/series/test_period.py b/pandas/tests/series/test_period.py index 0cce92d2d5d6f..0fc9a13b24b3e 100644 --- a/pandas/tests/series/test_period.py +++ b/pandas/tests/series/test_period.py @@ -6,10 +6,6 @@ from pandas import Series, period_range, DataFrame, Period -def _permute(obj): - return obj.take(np.random.permutation(len(obj))) - - class TestSeriesPeriod(object): def setup_method(self, method): From ecaac457e0dd0ece18e6ffa0a5313e82a769e725 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Tue, 23 Oct 2018 07:59:41 -0700 Subject: [PATCH 8/9] fixup duplicate import --- pandas/tests/series/test_arithmetic.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/tests/series/test_arithmetic.py b/pandas/tests/series/test_arithmetic.py index 9cc22cb08b1a8..8f02d234936f7 100644 --- a/pandas/tests/series/test_arithmetic.py +++ b/pandas/tests/series/test_arithmetic.py @@ -4,7 +4,6 @@ import numpy as np import pytest -import numpy as np from pandas import compat, Series from pandas.core.indexes.period import IncompatibleFrequency From c43137380768f09b64e4de19bab5d03f98479e34 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sat, 27 Oct 2018 22:18:09 -0700 Subject: [PATCH 9/9] post-merge cleanup --- pandas/tests/series/test_arithmetic.py | 4 ++-- pandas/tests/series/test_operators.py | 3 +-- pandas/tests/series/test_period.py | 1 - 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/pandas/tests/series/test_arithmetic.py b/pandas/tests/series/test_arithmetic.py index d5200aea3dba2..e781488a799ec 100644 --- a/pandas/tests/series/test_arithmetic.py +++ b/pandas/tests/series/test_arithmetic.py @@ -1,14 +1,14 @@ # -*- coding: utf-8 -*- -from datetime import timedelta import operator +from datetime import timedelta import numpy as np import pytest import pandas as pd import pandas.util.testing as tm +from pandas import Series, compat from pandas.core.indexes.period import IncompatibleFrequency -from pandas import compat, Series def _permute(obj): diff --git a/pandas/tests/series/test_operators.py b/pandas/tests/series/test_operators.py index d35bdf96ef94b..082ed5e0f5123 100644 --- a/pandas/tests/series/test_operators.py +++ b/pandas/tests/series/test_operators.py @@ -13,11 +13,10 @@ import pandas.util.testing as tm from pandas import ( Categorical, DataFrame, Index, NaT, Series, bdate_range, compat, - date_range, isna, timedelta_range + date_range, isna ) from pandas.compat import range from pandas.core import ops -from pandas.core.indexes.datetimes import Timestamp from pandas.util.testing import ( assert_almost_equal, assert_frame_equal, assert_series_equal ) diff --git a/pandas/tests/series/test_period.py b/pandas/tests/series/test_period.py index 3df8afe0d6ceb..88a5ff261fbb4 100644 --- a/pandas/tests/series/test_period.py +++ b/pandas/tests/series/test_period.py @@ -2,7 +2,6 @@ import pytest import pandas as pd - import pandas.util.testing as tm from pandas import DataFrame, Period, Series, period_range from pandas.core.arrays import PeriodArray