forked from jbesomi/texthero
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Implement dealing with np.nan, closes jbesomi#86
Every function in the library now handles NaNs correctly. Implemented through decorator @handle_nans in new file _helper.py. Tests added in test_nan.py As we went through the whole library anyways, argument "input" was renamed to "s" in some functions to be in line with the others. Co-authored-by: Maximilian Krahn <maximilian.krahn@icloud.com>
- Loading branch information
1 parent
7fdc168
commit ce196d4
Showing
8 changed files
with
182 additions
and
79 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,86 @@ | ||
""" | ||
Useful helper functions for the texthero library. | ||
""" | ||
|
||
import functools | ||
import wrapt | ||
import numpy as np | ||
|
||
|
||
""" | ||
Decorators. | ||
""" | ||
|
||
|
||
def handle_nans(wrapped=None, input_only=False): | ||
""" | ||
Decorator to make a function not change NaN values. | ||
Using the decorator, the function to be applied | ||
will not change cells that have value np.nan. | ||
The function must take as first input a Series s, | ||
manipulate that Series (e.g. removing diacritics) | ||
and then return as first output the Series s. | ||
Parameters | ||
---------- | ||
input_only: Boolean, default to False. | ||
Set to True when the output that is returned by the | ||
function is _not_ the same as the input series | ||
with (some) cells changed (e.g. in top_words, | ||
the output Series is different from the input | ||
Series, and in pca there is no return, so in both | ||
cases input_only is set to True). | ||
Examples | ||
-------- | ||
>>> from texthero._helper import * | ||
>>> import pandas as pd | ||
>>> import numpy as np | ||
>>> @handle_nans | ||
... def replace_a_with_b(s): | ||
... return s.str.replace("a", "b") | ||
>>> s_with_nan = pd.Series(["Test a", np.nan]) | ||
>>> replace_a_with_b(s_with_nan) | ||
0 Test b | ||
1 NaN | ||
dtype: object | ||
""" | ||
if wrapped is None: | ||
return functools.partial(handle_nans, input_only=input_only) | ||
|
||
@wrapt.decorator | ||
def wrapper(wrapped, instance, args, kwargs): | ||
|
||
# Get first input argument (the series). | ||
s = args[0] | ||
nan_mask = ~s.isna() | ||
|
||
# Need a copy as changing s[nan_mask] would change the original input. | ||
s_result = s.copy() | ||
s_without_nans = s[nan_mask] | ||
|
||
# Change input Series so the function will only work on the non-nan fields. | ||
args = (s_without_nans,) + args[1:] if args[1:] else (s_without_nans,) | ||
|
||
# Execute the function and get the result. | ||
output = wrapped(*args, **kwargs) | ||
|
||
# If we should also handle the output. | ||
if not input_only: | ||
# Replace first argument of output (that's the Series) to refill the NaN fields. | ||
if not isinstance(output, tuple): | ||
output = (output,) | ||
s_result[nan_mask] = output[0] | ||
|
||
# Recover index name if set. | ||
if output[0].index.name: | ||
s_result.index.name = output[0].index.name | ||
|
||
output = (s_result,) + output[1:] if output[1:] else s_result | ||
|
||
return output | ||
|
||
return wrapper(wrapped) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.