-
-
Notifications
You must be signed in to change notification settings - Fork 18.2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
BUG/CLN: Decouple Series/DataFrame.transform #35964
Changes from all commits
7b6ab94
04c1238
052df6e
7b13811
133bfaa
a5d4a19
25c4457
8454d91
c37ef68
9eee0cb
cf4f80b
69e6807
f66a806
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -18,9 +18,10 @@ | |
Union, | ||
) | ||
|
||
from pandas._typing import AggFuncType, FrameOrSeries, Label | ||
from pandas._typing import AggFuncType, Axis, FrameOrSeries, Label | ||
|
||
from pandas.core.dtypes.common import is_dict_like, is_list_like | ||
from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries | ||
|
||
from pandas.core.base import SpecificationError | ||
import pandas.core.common as com | ||
|
@@ -384,3 +385,98 @@ def validate_func_kwargs( | |
if not columns: | ||
raise TypeError(no_arg_message) | ||
return columns, func | ||
|
||
|
||
def transform( | ||
obj: FrameOrSeries, func: AggFuncType, axis: Axis, *args, **kwargs, | ||
) -> FrameOrSeries: | ||
""" | ||
Transform a DataFrame or Series | ||
|
||
Parameters | ||
---------- | ||
obj : DataFrame or Series | ||
Object to compute the transform on. | ||
func : string, function, list, or dictionary | ||
Function(s) to compute the transform with. | ||
axis : {0 or 'index', 1 or 'columns'} | ||
Axis along which the function is applied: | ||
|
||
* 0 or 'index': apply function to each column. | ||
* 1 or 'columns': apply function to each row. | ||
|
||
Returns | ||
------- | ||
DataFrame or Series | ||
Result of applying ``func`` along the given axis of the | ||
Series or DataFrame. | ||
|
||
Raises | ||
------ | ||
ValueError | ||
If the transform function fails or does not transform. | ||
""" | ||
from pandas.core.reshape.concat import concat | ||
|
||
is_series = obj.ndim == 1 | ||
|
||
if obj._get_axis_number(axis) == 1: | ||
assert not is_series | ||
jreback marked this conversation as resolved.
Show resolved
Hide resolved
|
||
return transform(obj.T, func, 0, *args, **kwargs).T | ||
|
||
if isinstance(func, list): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we should probably use is_list_like here |
||
if is_series: | ||
func = {com.get_callable_name(v) or v: v for v in func} | ||
else: | ||
func = {col: func for col in obj} | ||
|
||
if isinstance(func, dict): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. should probably use is_dict_like here |
||
if not is_series: | ||
cols = sorted(set(func.keys()) - set(obj.columns)) | ||
if len(cols) > 0: | ||
raise SpecificationError(f"Column(s) {cols} do not exist") | ||
|
||
if any(isinstance(v, dict) for v in func.values()): | ||
# GH 15931 - deprecation of renaming keys | ||
raise SpecificationError("nested renamer is not supported") | ||
|
||
results = {} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you type results |
||
for name, how in func.items(): | ||
colg = obj._gotitem(name, ndim=1) | ||
try: | ||
results[name] = transform(colg, how, 0, *args, **kwargs) | ||
except Exception as e: | ||
if str(e) == "Function did not transform": | ||
raise e | ||
|
||
# combine results | ||
if len(results) == 0: | ||
raise ValueError("Transform function failed") | ||
return concat(results, axis=1) | ||
|
||
# func is either str or callable | ||
try: | ||
jreback marked this conversation as resolved.
Show resolved
Hide resolved
|
||
if isinstance(func, str): | ||
result = obj._try_aggregate_string_function(func, *args, **kwargs) | ||
else: | ||
f = obj._get_cython_func(func) | ||
if f and not args and not kwargs: | ||
result = getattr(obj, f)() | ||
else: | ||
try: | ||
result = obj.apply(func, args=args, **kwargs) | ||
except Exception: | ||
result = func(obj, *args, **kwargs) | ||
except Exception: | ||
raise ValueError("Transform function failed") | ||
|
||
# Functions that transform may return empty Series/DataFrame | ||
# when the dtype is not appropriate | ||
if isinstance(result, (ABCSeries, ABCDataFrame)) and result.empty: | ||
raise ValueError("Transform function failed") | ||
if not isinstance(result, (ABCSeries, ABCDataFrame)) or not result.index.equals( | ||
obj.index | ||
): | ||
raise ValueError("Function did not transform") | ||
|
||
return result |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
move to reshaping section (generally we want almost nothing in Other)