Skip to content
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
Show all changes
17 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -4971,7 +4971,7 @@ def _combine_match_index(self, other, func, level=None):
index=left.index, columns=self.columns,
copy=False)

def _combine_match_columns(self, other, func, level=None, try_cast=True):
def _combine_match_columns(self, other, func, level=None):
assert isinstance(other, Series)
left, right = self.align(other, join='outer', axis=1, level=level,
copy=False)
Expand Down
270 changes: 134 additions & 136 deletions pandas/core/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -943,6 +943,134 @@ def should_series_dispatch(left, right, op):
return False


def dispatch_to_series(left, right, func, str_rep=None, axis=None):
"""
Evaluate the frame operation func(left, right) by evaluating
column-by-column, dispatching to the Series implementation.

Parameters
----------
left : DataFrame
right : scalar or DataFrame
func : arithmetic or comparison operator
str_rep : str or None, default None
axis : {None, 0, 1, "index", "columns"}

Returns
-------
DataFrame
"""
# Note: we use iloc to access columns for compat with cases
# with non-unique columns.
import pandas.core.computation.expressions as expressions
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can this be imported at the top?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not 100% sure, but I think this is a run-time import to make import pandas as pd faster


right = lib.item_from_zerodim(right)
if lib.is_scalar(right):

def column_op(a, b):
return {i: func(a.iloc[:, i], b)
for i in range(len(a.columns))}

elif isinstance(right, ABCDataFrame):
assert right._indexed_same(left)

def column_op(a, b):
return {i: func(a.iloc[:, i], b.iloc[:, i])
for i in range(len(a.columns))}

elif isinstance(right, ABCSeries) and axis == "columns":
# We only get here if called via left._combine_match_columns,
# in which case we specifically want to operate row-by-row
assert right.index.equals(left.columns)

def column_op(a, b):
return {i: func(a.iloc[:, i], b.iloc[i])
for i in range(len(a.columns))}

elif isinstance(right, ABCSeries):
assert right.index.equals(left.index) # Handle other cases later

def column_op(a, b):
return {i: func(a.iloc[:, i], b)
for i in range(len(a.columns))}

else:
# Remaining cases have less-obvious dispatch rules
raise NotImplementedError(right)

new_data = expressions.evaluate(column_op, str_rep, left, right)

result = left._constructor(new_data, index=left.index, copy=False)
# Pin columns instead of passing to constructor for compat with
# non-unique columns case
result.columns = left.columns
return result


def dispatch_to_index_op(op, left, right, index_class):
"""
Wrap Series left in the given index_class to delegate the operation op
to the index implementation. DatetimeIndex and TimedeltaIndex perform
type checking, timezone handling, overflow checks, etc.

Parameters
----------
op : binary operator (operator.add, operator.sub, ...)
left : Series
right : object
index_class : DatetimeIndex or TimedeltaIndex

Returns
-------
result : object, usually DatetimeIndex, TimedeltaIndex, or Series
"""
left_idx = index_class(left)

# avoid accidentally allowing integer add/sub. For datetime64[tz] dtypes,
# left_idx may inherit a freq from a cached DatetimeIndex.
# See discussion in GH#19147.
if getattr(left_idx, 'freq', None) is not None:
left_idx = left_idx._shallow_copy(freq=None)
try:
result = op(left_idx, right)
except NullFrequencyError:
# DatetimeIndex and TimedeltaIndex with freq == None raise ValueError
# on add/sub of integers (or int-like). We re-raise as a TypeError.
raise TypeError('incompatible type for a datetime/timedelta '
'operation [{name}]'.format(name=op.__name__))
return result


def dispatch_to_extension_op(op, left, right):
"""
Assume that left or right is a Series backed by an ExtensionArray,
apply the operator defined by op.
"""

# The op calls will raise TypeError if the op is not defined
# on the ExtensionArray

# unbox Series and Index to arrays
if isinstance(left, (ABCSeries, ABCIndexClass)):
new_left = left._values
else:
new_left = left

if isinstance(right, (ABCSeries, ABCIndexClass)):
new_right = right._values
else:
new_right = right

res_values = op(new_left, new_right)
res_name = get_op_result_name(left, right)

if op.__name__ in ['divmod', 'rdivmod']:
return _construct_divmod_result(
left, res_values, left.index, res_name)

return _construct_result(left, res_values, left.index, res_name)


# -----------------------------------------------------------------------------
# Functions that add arithmetic methods to objects, given arithmetic factory
# methods
Expand Down Expand Up @@ -1202,36 +1330,6 @@ def _construct_divmod_result(left, result, index, name, dtype=None):
)


def dispatch_to_extension_op(op, left, right):
"""
Assume that left or right is a Series backed by an ExtensionArray,
apply the operator defined by op.
"""

# The op calls will raise TypeError if the op is not defined
# on the ExtensionArray

# unbox Series and Index to arrays
if isinstance(left, (ABCSeries, ABCIndexClass)):
new_left = left._values
else:
new_left = left

if isinstance(right, (ABCSeries, ABCIndexClass)):
new_right = right._values
else:
new_right = right

res_values = op(new_left, new_right)
res_name = get_op_result_name(left, right)

if op.__name__ in ['divmod', 'rdivmod']:
return _construct_divmod_result(
left, res_values, left.index, res_name)

return _construct_result(left, res_values, left.index, res_name)


def _arith_method_SERIES(cls, op, special):
"""
Wrapper function for Series arithmetic operations, to avoid
Expand Down Expand Up @@ -1329,40 +1427,6 @@ def wrapper(left, right):
return wrapper


def dispatch_to_index_op(op, left, right, index_class):
"""
Wrap Series left in the given index_class to delegate the operation op
to the index implementation. DatetimeIndex and TimedeltaIndex perform
type checking, timezone handling, overflow checks, etc.

Parameters
----------
op : binary operator (operator.add, operator.sub, ...)
left : Series
right : object
index_class : DatetimeIndex or TimedeltaIndex

Returns
-------
result : object, usually DatetimeIndex, TimedeltaIndex, or Series
"""
left_idx = index_class(left)

# avoid accidentally allowing integer add/sub. For datetime64[tz] dtypes,
# left_idx may inherit a freq from a cached DatetimeIndex.
# See discussion in GH#19147.
if getattr(left_idx, 'freq', None) is not None:
left_idx = left_idx._shallow_copy(freq=None)
try:
result = op(left_idx, right)
except NullFrequencyError:
# DatetimeIndex and TimedeltaIndex with freq == None raise ValueError
# on add/sub of integers (or int-like). We re-raise as a TypeError.
raise TypeError('incompatible type for a datetime/timedelta '
'operation [{name}]'.format(name=op.__name__))
return result


def _comp_method_OBJECT_ARRAY(op, x, y):
if isinstance(y, list):
y = construct_1d_object_array_from_listlike(y)
Expand Down Expand Up @@ -1661,72 +1725,9 @@ def flex_wrapper(self, other, level=None, fill_value=None, axis=0):
# -----------------------------------------------------------------------------
# DataFrame

def dispatch_to_series(left, right, func, str_rep=None, axis=None):
"""
Evaluate the frame operation func(left, right) by evaluating
column-by-column, dispatching to the Series implementation.

Parameters
----------
left : DataFrame
right : scalar or DataFrame
func : arithmetic or comparison operator
str_rep : str or None, default None
axis : {None, 0, 1, "index", "columns"}

Returns
-------
DataFrame
"""
# Note: we use iloc to access columns for compat with cases
# with non-unique columns.
import pandas.core.computation.expressions as expressions

right = lib.item_from_zerodim(right)
if lib.is_scalar(right):

def column_op(a, b):
return {i: func(a.iloc[:, i], b)
for i in range(len(a.columns))}

elif isinstance(right, ABCDataFrame):
assert right._indexed_same(left)

def column_op(a, b):
return {i: func(a.iloc[:, i], b.iloc[:, i])
for i in range(len(a.columns))}

elif isinstance(right, ABCSeries) and axis == "columns":
# We only get here if called via left._combine_match_columns,
# in which case we specifically want to operate row-by-row
assert right.index.equals(left.columns)

def column_op(a, b):
return {i: func(a.iloc[:, i], b.iloc[i])
for i in range(len(a.columns))}

elif isinstance(right, ABCSeries):
assert right.index.equals(left.index) # Handle other cases later

def column_op(a, b):
return {i: func(a.iloc[:, i], b)
for i in range(len(a.columns))}

else:
# Remaining cases have less-obvious dispatch rules
raise NotImplementedError(right)

new_data = expressions.evaluate(column_op, str_rep, left, right)

result = left._constructor(new_data, index=left.index, copy=False)
# Pin columns instead of passing to constructor for compat with
# non-unique columns case
result.columns = left.columns
return result


def _combine_series_frame(self, other, func, fill_value=None, axis=None,
level=None, try_cast=True):
level=None):
"""
Apply binary operator `func` to self, other using alignment and fill
conventions determined by the fill_value, axis, level, and try_cast kwargs.
Expand All @@ -1739,7 +1740,6 @@ def _combine_series_frame(self, other, func, fill_value=None, axis=None,
fill_value : object, default None
axis : {0, 1, 'columns', 'index', None}, default None
level : int or None, default None
try_cast : bool, default True

Returns
-------
Expand All @@ -1754,8 +1754,7 @@ def _combine_series_frame(self, other, func, fill_value=None, axis=None,
if axis == 0:
return self._combine_match_index(other, func, level=level)
else:
return self._combine_match_columns(other, func, level=level,
try_cast=try_cast)
return self._combine_match_columns(other, func, level=level)
else:
if not len(other):
return self * np.nan
Expand All @@ -1766,8 +1765,7 @@ def _combine_series_frame(self, other, func, fill_value=None, axis=None,
columns=self.columns)

# default axis is columns
return self._combine_match_columns(other, func, level=level,
try_cast=try_cast)
return self._combine_match_columns(other, func, level=level)


def _align_method_FRAME(left, right, axis):
Expand Down Expand Up @@ -1867,7 +1865,7 @@ def f(self, other, axis=default_axis, level=None, fill_value=None):
pass_op = op if axis in [0, "columns", None] else na_op
return _combine_series_frame(self, other, pass_op,
fill_value=fill_value, axis=axis,
level=level, try_cast=True)
level=level)
else:
if fill_value is not None:
self = self.fillna(fill_value)
Expand Down Expand Up @@ -1909,7 +1907,7 @@ def f(self, other, axis=default_axis, level=None):
elif isinstance(other, ABCSeries):
return _combine_series_frame(self, other, na_op,
fill_value=None, axis=axis,
level=level, try_cast=False)
level=level)
else:
return self._combine_const(other, na_op, try_cast=False)

Expand Down Expand Up @@ -1937,7 +1935,7 @@ def f(self, other):
elif isinstance(other, ABCSeries):
return _combine_series_frame(self, other, func,
fill_value=None, axis=None,
level=None, try_cast=False)
level=None)
else:

# straight boolean comparisons we want to allow all columns
Expand Down
Loading