Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

REF: collect ops dispatch functions in one place, try to de-duplicate SparseDataFrame methods #23060

Merged
merged 17 commits into from
Oct 28, 2018
Merged
Show file tree
Hide file tree
Changes from 15 commits
Commits
Show all changes
17 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
255 changes: 128 additions & 127 deletions pandas/core/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -943,6 +943,134 @@ def should_series_dispatch(left, right, op):
return False


def dispatch_to_series(left, right, func, str_rep=None, axis=None):
"""
Evaluate the frame operation func(left, right) by evaluating
column-by-column, dispatching to the Series implementation.

Parameters
----------
left : DataFrame
right : scalar or DataFrame
func : arithmetic or comparison operator
str_rep : str or None, default None
axis : {None, 0, 1, "index", "columns"}

Returns
-------
DataFrame
"""
# Note: we use iloc to access columns for compat with cases
# with non-unique columns.
import pandas.core.computation.expressions as expressions
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can this be imported at the top?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not 100% sure, but I think this is a run-time import to make import pandas as pd faster


right = lib.item_from_zerodim(right)
if lib.is_scalar(right):

def column_op(a, b):
return {i: func(a.iloc[:, i], b)
for i in range(len(a.columns))}

elif isinstance(right, ABCDataFrame):
assert right._indexed_same(left)

def column_op(a, b):
return {i: func(a.iloc[:, i], b.iloc[:, i])
for i in range(len(a.columns))}

elif isinstance(right, ABCSeries) and axis == "columns":
# We only get here if called via left._combine_match_columns,
# in which case we specifically want to operate row-by-row
assert right.index.equals(left.columns)

def column_op(a, b):
return {i: func(a.iloc[:, i], b.iloc[i])
for i in range(len(a.columns))}

elif isinstance(right, ABCSeries):
assert right.index.equals(left.index) # Handle other cases later

def column_op(a, b):
return {i: func(a.iloc[:, i], b)
for i in range(len(a.columns))}

else:
# Remaining cases have less-obvious dispatch rules
raise NotImplementedError(right)

new_data = expressions.evaluate(column_op, str_rep, left, right)

result = left._constructor(new_data, index=left.index, copy=False)
# Pin columns instead of passing to constructor for compat with
# non-unique columns case
result.columns = left.columns
return result


def dispatch_to_index_op(op, left, right, index_class):
"""
Wrap Series left in the given index_class to delegate the operation op
to the index implementation. DatetimeIndex and TimedeltaIndex perform
type checking, timezone handling, overflow checks, etc.

Parameters
----------
op : binary operator (operator.add, operator.sub, ...)
left : Series
right : object
index_class : DatetimeIndex or TimedeltaIndex

Returns
-------
result : object, usually DatetimeIndex, TimedeltaIndex, or Series
"""
left_idx = index_class(left)

# avoid accidentally allowing integer add/sub. For datetime64[tz] dtypes,
# left_idx may inherit a freq from a cached DatetimeIndex.
# See discussion in GH#19147.
if getattr(left_idx, 'freq', None) is not None:
left_idx = left_idx._shallow_copy(freq=None)
try:
result = op(left_idx, right)
except NullFrequencyError:
# DatetimeIndex and TimedeltaIndex with freq == None raise ValueError
# on add/sub of integers (or int-like). We re-raise as a TypeError.
raise TypeError('incompatible type for a datetime/timedelta '
'operation [{name}]'.format(name=op.__name__))
return result


def dispatch_to_extension_op(op, left, right):
"""
Assume that left or right is a Series backed by an ExtensionArray,
apply the operator defined by op.
"""

# The op calls will raise TypeError if the op is not defined
# on the ExtensionArray

# unbox Series and Index to arrays
if isinstance(left, (ABCSeries, ABCIndexClass)):
new_left = left._values
else:
new_left = left

if isinstance(right, (ABCSeries, ABCIndexClass)):
new_right = right._values
else:
new_right = right

res_values = op(new_left, new_right)
res_name = get_op_result_name(left, right)

if op.__name__ in ['divmod', 'rdivmod']:
return _construct_divmod_result(
left, res_values, left.index, res_name)

return _construct_result(left, res_values, left.index, res_name)


# -----------------------------------------------------------------------------
# Functions that add arithmetic methods to objects, given arithmetic factory
# methods
Expand Down Expand Up @@ -1202,36 +1330,6 @@ def _construct_divmod_result(left, result, index, name, dtype=None):
)


def dispatch_to_extension_op(op, left, right):
"""
Assume that left or right is a Series backed by an ExtensionArray,
apply the operator defined by op.
"""

# The op calls will raise TypeError if the op is not defined
# on the ExtensionArray

# unbox Series and Index to arrays
if isinstance(left, (ABCSeries, ABCIndexClass)):
new_left = left._values
else:
new_left = left

if isinstance(right, (ABCSeries, ABCIndexClass)):
new_right = right._values
else:
new_right = right

res_values = op(new_left, new_right)
res_name = get_op_result_name(left, right)

if op.__name__ in ['divmod', 'rdivmod']:
return _construct_divmod_result(
left, res_values, left.index, res_name)

return _construct_result(left, res_values, left.index, res_name)


def _arith_method_SERIES(cls, op, special):
"""
Wrapper function for Series arithmetic operations, to avoid
Expand Down Expand Up @@ -1329,40 +1427,6 @@ def wrapper(left, right):
return wrapper


def dispatch_to_index_op(op, left, right, index_class):
"""
Wrap Series left in the given index_class to delegate the operation op
to the index implementation. DatetimeIndex and TimedeltaIndex perform
type checking, timezone handling, overflow checks, etc.

Parameters
----------
op : binary operator (operator.add, operator.sub, ...)
left : Series
right : object
index_class : DatetimeIndex or TimedeltaIndex

Returns
-------
result : object, usually DatetimeIndex, TimedeltaIndex, or Series
"""
left_idx = index_class(left)

# avoid accidentally allowing integer add/sub. For datetime64[tz] dtypes,
# left_idx may inherit a freq from a cached DatetimeIndex.
# See discussion in GH#19147.
if getattr(left_idx, 'freq', None) is not None:
left_idx = left_idx._shallow_copy(freq=None)
try:
result = op(left_idx, right)
except NullFrequencyError:
# DatetimeIndex and TimedeltaIndex with freq == None raise ValueError
# on add/sub of integers (or int-like). We re-raise as a TypeError.
raise TypeError('incompatible type for a datetime/timedelta '
'operation [{name}]'.format(name=op.__name__))
return result


def _comp_method_OBJECT_ARRAY(op, x, y):
if isinstance(y, list):
y = construct_1d_object_array_from_listlike(y)
Expand Down Expand Up @@ -1661,69 +1725,6 @@ def flex_wrapper(self, other, level=None, fill_value=None, axis=0):
# -----------------------------------------------------------------------------
# DataFrame

def dispatch_to_series(left, right, func, str_rep=None, axis=None):
"""
Evaluate the frame operation func(left, right) by evaluating
column-by-column, dispatching to the Series implementation.

Parameters
----------
left : DataFrame
right : scalar or DataFrame
func : arithmetic or comparison operator
str_rep : str or None, default None
axis : {None, 0, 1, "index", "columns"}

Returns
-------
DataFrame
"""
# Note: we use iloc to access columns for compat with cases
# with non-unique columns.
import pandas.core.computation.expressions as expressions

right = lib.item_from_zerodim(right)
if lib.is_scalar(right):

def column_op(a, b):
return {i: func(a.iloc[:, i], b)
for i in range(len(a.columns))}

elif isinstance(right, ABCDataFrame):
assert right._indexed_same(left)

def column_op(a, b):
return {i: func(a.iloc[:, i], b.iloc[:, i])
for i in range(len(a.columns))}

elif isinstance(right, ABCSeries) and axis == "columns":
# We only get here if called via left._combine_match_columns,
# in which case we specifically want to operate row-by-row
assert right.index.equals(left.columns)

def column_op(a, b):
return {i: func(a.iloc[:, i], b.iloc[i])
for i in range(len(a.columns))}

elif isinstance(right, ABCSeries):
assert right.index.equals(left.index) # Handle other cases later

def column_op(a, b):
return {i: func(a.iloc[:, i], b)
for i in range(len(a.columns))}

else:
# Remaining cases have less-obvious dispatch rules
raise NotImplementedError(right)

new_data = expressions.evaluate(column_op, str_rep, left, right)

result = left._constructor(new_data, index=left.index, copy=False)
# Pin columns instead of passing to constructor for compat with
# non-unique columns case
result.columns = left.columns
return result


def _combine_series_frame(self, other, func, fill_value=None, axis=None,
level=None):
Expand Down
Loading