diff --git a/ci/code_checks.sh b/ci/code_checks.sh index 7729b2d6aa4cc..d16249724127f 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -240,8 +240,8 @@ fi ### DOCSTRINGS ### if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then - MSG='Validate docstrings (GL06, GL07, GL09, SS04, PR03, PR05, EX04, SS05)' ; echo $MSG - $BASE_DIR/scripts/validate_docstrings.py --format=azure --errors=GL06,GL07,GL09,SS04,PR03,PR05,EX04, SS05 + MSG='Validate docstrings (GL06, GL07, GL09, SS04, PR03, PR05, EX04, RT04, SS05)' ; echo $MSG + $BASE_DIR/scripts/validate_docstrings.py --format=azure --errors=GL06,GL07,GL09,SS04,PR03,PR05,EX04,RT04,SS05 RET=$(($RET + $?)) ; echo $MSG "DONE" fi diff --git a/doc/source/whatsnew/v0.24.2.rst b/doc/source/whatsnew/v0.24.2.rst index cba21ce7ee1e6..a047ad46e4887 100644 --- a/doc/source/whatsnew/v0.24.2.rst +++ b/doc/source/whatsnew/v0.24.2.rst @@ -20,9 +20,7 @@ including other versions of pandas. Fixed Regressions ^^^^^^^^^^^^^^^^^ -- -- -- +- Fixed regression in :meth:`DataFrame.all` and :meth:`DataFrame.any` where ``bool_only=True`` was ignored (:issue:`25101`) .. _whatsnew_0242.enhancements: @@ -51,7 +49,7 @@ Bug Fixes **I/O** -- +- Bug in reading a HDF5 table-format ``DataFrame`` created in Python 2, in Python 3 (:issue:`24925`) - - diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index b473a7aef929e..02f33c49c79ae 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -289,9 +289,9 @@ def unique(values): Returns ------- unique values. - - If the input is an Index, the return is an Index - - If the input is a Categorical dtype, the return is a Categorical - - If the input is a Series/ndarray, the return will be an ndarray + If the input is an Index, the return is an Index + If the input is a Categorical dtype, the return is a Categorical + If the input is a Series/ndarray, the return will be an ndarray See Also -------- diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index abf401890a714..4be7eb8ddb890 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -139,7 +139,8 @@ def is_object_dtype(arr_or_dtype): Returns ------- - boolean : Whether or not the array-like or dtype is of the object dtype. + boolean + Whether or not the array-like or dtype is of the object dtype. Examples -------- @@ -230,8 +231,8 @@ def is_scipy_sparse(arr): Returns ------- - boolean : Whether or not the array-like is a - scipy.sparse.spmatrix instance. + boolean + Whether or not the array-like is a scipy.sparse.spmatrix instance. Notes ----- @@ -270,7 +271,8 @@ def is_categorical(arr): Returns ------- - boolean : Whether or not the array-like is of a Categorical instance. + boolean + Whether or not the array-like is of a Categorical instance. Examples -------- @@ -305,8 +307,9 @@ def is_datetimetz(arr): Returns ------- - boolean : Whether or not the array-like is a datetime array-like with - a timezone component in its dtype. + boolean + Whether or not the array-like is a datetime array-like with a + timezone component in its dtype. Examples -------- @@ -347,7 +350,8 @@ def is_offsetlike(arr_or_obj): Returns ------- - boolean : Whether the object is a DateOffset or listlike of DatetOffsets + boolean + Whether the object is a DateOffset or listlike of DatetOffsets Examples -------- @@ -381,7 +385,8 @@ def is_period(arr): Returns ------- - boolean : Whether or not the array-like is a periodical index. + boolean + Whether or not the array-like is a periodical index. Examples -------- @@ -411,8 +416,8 @@ def is_datetime64_dtype(arr_or_dtype): Returns ------- - boolean : Whether or not the array-like or dtype is of - the datetime64 dtype. + boolean + Whether or not the array-like or dtype is of the datetime64 dtype. Examples -------- @@ -442,8 +447,8 @@ def is_datetime64tz_dtype(arr_or_dtype): Returns ------- - boolean : Whether or not the array-like or dtype is of - a DatetimeTZDtype dtype. + boolean + Whether or not the array-like or dtype is of a DatetimeTZDtype dtype. Examples -------- @@ -480,8 +485,8 @@ def is_timedelta64_dtype(arr_or_dtype): Returns ------- - boolean : Whether or not the array-like or dtype is - of the timedelta64 dtype. + boolean + Whether or not the array-like or dtype is of the timedelta64 dtype. Examples -------- @@ -511,7 +516,8 @@ def is_period_dtype(arr_or_dtype): Returns ------- - boolean : Whether or not the array-like or dtype is of the Period dtype. + boolean + Whether or not the array-like or dtype is of the Period dtype. Examples -------- @@ -544,8 +550,8 @@ def is_interval_dtype(arr_or_dtype): Returns ------- - boolean : Whether or not the array-like or dtype is - of the Interval dtype. + boolean + Whether or not the array-like or dtype is of the Interval dtype. Examples -------- @@ -580,8 +586,8 @@ def is_categorical_dtype(arr_or_dtype): Returns ------- - boolean : Whether or not the array-like or dtype is - of the Categorical dtype. + boolean + Whether or not the array-like or dtype is of the Categorical dtype. Examples -------- @@ -613,7 +619,8 @@ def is_string_dtype(arr_or_dtype): Returns ------- - boolean : Whether or not the array or dtype is of the string dtype. + boolean + Whether or not the array or dtype is of the string dtype. Examples -------- @@ -647,8 +654,9 @@ def is_period_arraylike(arr): Returns ------- - boolean : Whether or not the array-like is a periodical - array-like or PeriodIndex instance. + boolean + Whether or not the array-like is a periodical array-like or + PeriodIndex instance. Examples -------- @@ -678,8 +686,9 @@ def is_datetime_arraylike(arr): Returns ------- - boolean : Whether or not the array-like is a datetime - array-like or DatetimeIndex. + boolean + Whether or not the array-like is a datetime array-like or + DatetimeIndex. Examples -------- @@ -713,7 +722,8 @@ def is_datetimelike(arr): Returns ------- - boolean : Whether or not the array-like is a datetime-like array-like. + boolean + Whether or not the array-like is a datetime-like array-like. Examples -------- @@ -754,7 +764,8 @@ def is_dtype_equal(source, target): Returns ---------- - boolean : Whether or not the two dtypes are equal. + boolean + Whether or not the two dtypes are equal. Examples -------- @@ -794,7 +805,8 @@ def is_dtype_union_equal(source, target): Returns ---------- - boolean : Whether or not the two dtypes are equal. + boolean + Whether or not the two dtypes are equal. >>> is_dtype_equal("int", int) True @@ -835,7 +847,8 @@ def is_any_int_dtype(arr_or_dtype): Returns ------- - boolean : Whether or not the array or dtype is of an integer dtype. + boolean + Whether or not the array or dtype is of an integer dtype. Examples -------- @@ -883,8 +896,9 @@ def is_integer_dtype(arr_or_dtype): Returns ------- - boolean : Whether or not the array or dtype is of an integer dtype - and not an instance of timedelta64. + boolean + Whether or not the array or dtype is of an integer dtype and + not an instance of timedelta64. Examples -------- @@ -938,8 +952,9 @@ def is_signed_integer_dtype(arr_or_dtype): Returns ------- - boolean : Whether or not the array or dtype is of a signed integer dtype - and not an instance of timedelta64. + boolean + Whether or not the array or dtype is of a signed integer dtype + and not an instance of timedelta64. Examples -------- @@ -993,8 +1008,8 @@ def is_unsigned_integer_dtype(arr_or_dtype): Returns ------- - boolean : Whether or not the array or dtype is of an - unsigned integer dtype. + boolean + Whether or not the array or dtype is of an unsigned integer dtype. Examples -------- @@ -1036,7 +1051,8 @@ def is_int64_dtype(arr_or_dtype): Returns ------- - boolean : Whether or not the array or dtype is of the int64 dtype. + boolean + Whether or not the array or dtype is of the int64 dtype. Notes ----- @@ -1086,7 +1102,8 @@ def is_datetime64_any_dtype(arr_or_dtype): Returns ------- - boolean : Whether or not the array or dtype is of the datetime64 dtype. + boolean + Whether or not the array or dtype is of the datetime64 dtype. Examples -------- @@ -1126,7 +1143,8 @@ def is_datetime64_ns_dtype(arr_or_dtype): Returns ------- - boolean : Whether or not the array or dtype is of the datetime64[ns] dtype. + boolean + Whether or not the array or dtype is of the datetime64[ns] dtype. Examples -------- @@ -1178,8 +1196,8 @@ def is_timedelta64_ns_dtype(arr_or_dtype): Returns ------- - boolean : Whether or not the array or dtype is of the - timedelta64[ns] dtype. + boolean + Whether or not the array or dtype is of the timedelta64[ns] dtype. Examples -------- @@ -1207,8 +1225,9 @@ def is_datetime_or_timedelta_dtype(arr_or_dtype): Returns ------- - boolean : Whether or not the array or dtype is of a - timedelta64, or datetime64 dtype. + boolean + Whether or not the array or dtype is of a timedelta64, + or datetime64 dtype. Examples -------- @@ -1248,7 +1267,8 @@ def _is_unorderable_exception(e): Returns ------- - boolean : Whether or not the exception raised is an unorderable exception. + boolean + Whether or not the exception raised is an unorderable exception. """ if PY36: @@ -1275,8 +1295,8 @@ def is_numeric_v_string_like(a, b): Returns ------- - boolean : Whether we return a comparing a string-like - object to a numeric array. + boolean + Whether we return a comparing a string-like object to a numeric array. Examples -------- @@ -1332,8 +1352,8 @@ def is_datetimelike_v_numeric(a, b): Returns ------- - boolean : Whether we return a comparing a datetime-like - to a numeric object. + boolean + Whether we return a comparing a datetime-like to a numeric object. Examples -------- @@ -1388,8 +1408,8 @@ def is_datetimelike_v_object(a, b): Returns ------- - boolean : Whether we return a comparing a datetime-like - to an object instance. + boolean + Whether we return a comparing a datetime-like to an object instance. Examples -------- @@ -1442,7 +1462,8 @@ def needs_i8_conversion(arr_or_dtype): Returns ------- - boolean : Whether or not the array or dtype should be converted to int64. + boolean + Whether or not the array or dtype should be converted to int64. Examples -------- @@ -1480,7 +1501,8 @@ def is_numeric_dtype(arr_or_dtype): Returns ------- - boolean : Whether or not the array or dtype is of a numeric dtype. + boolean + Whether or not the array or dtype is of a numeric dtype. Examples -------- @@ -1524,7 +1546,8 @@ def is_string_like_dtype(arr_or_dtype): Returns ------- - boolean : Whether or not the array or dtype is of the string dtype. + boolean + Whether or not the array or dtype is of the string dtype. Examples -------- @@ -1555,7 +1578,8 @@ def is_float_dtype(arr_or_dtype): Returns ------- - boolean : Whether or not the array or dtype is of a float dtype. + boolean + Whether or not the array or dtype is of a float dtype. Examples -------- @@ -1586,7 +1610,8 @@ def is_bool_dtype(arr_or_dtype): Returns ------- - boolean : Whether or not the array or dtype is of a boolean dtype. + boolean + Whether or not the array or dtype is of a boolean dtype. Notes ----- @@ -1655,8 +1680,8 @@ def is_extension_type(arr): Returns ------- - boolean : Whether or not the array-like is of a pandas - extension class instance. + boolean + Whether or not the array-like is of a pandas extension class instance. Examples -------- @@ -1760,7 +1785,8 @@ def is_complex_dtype(arr_or_dtype): Returns ------- - boolean : Whether or not the array or dtype is of a compex dtype. + boolean + Whether or not the array or dtype is of a compex dtype. Examples -------- diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 6bab66cef9a11..afc4194e71eb1 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -7498,7 +7498,8 @@ def f(x): if filter_type is None or filter_type == 'numeric': data = self._get_numeric_data() elif filter_type == 'bool': - data = self + # GH 25101, # GH 24434 + data = self._get_bool_data() if axis == 0 else self else: # pragma: no cover msg = ("Generating numeric_only data with filter_type {f}" "not supported.".format(f=filter_type)) @@ -7767,10 +7768,10 @@ def quantile(self, q=0.5, axis=0, numeric_only=True, ------- quantiles : Series or DataFrame - - If ``q`` is an array, a DataFrame will be returned where the + If ``q`` is an array, a DataFrame will be returned where the index is ``q``, the columns are the columns of self, and the values are the quantiles. - - If ``q`` is a float, a Series will be returned where the + If ``q`` is a float, a Series will be returned where the index is the columns of self and the values are the quantiles. See Also diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 313fd340aeab8..4ddc9c9d8fea2 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -4951,10 +4951,10 @@ def pipe(self, func, *args, **kwargs): Returns ------- DataFrame, Series or scalar - if DataFrame.agg is called with a single function, returns a Series - if DataFrame.agg is called with several functions, returns a DataFrame - if Series.agg is called with single function, returns a scalar - if Series.agg is called with several functions, returns a Series + If DataFrame.agg is called with a single function, returns a Series + If DataFrame.agg is called with several functions, returns a DataFrame + If Series.agg is called with single function, returns a scalar + If Series.agg is called with several functions, returns a Series %(see_also)s @@ -6878,10 +6878,10 @@ def asof(self, where, subset=None): ------- scalar, Series, or DataFrame - * scalar : when `self` is a Series and `where` is a scalar - * Series: when `self` is a Series and `where` is an array-like, + Scalar : when `self` is a Series and `where` is a scalar + Series: when `self` is a Series and `where` is an array-like, or when `self` is a DataFrame and `where` is a scalar - * DataFrame : when `self` is a DataFrame and `where` is an + DataFrame : when `self` is a DataFrame and `where` is an array-like See Also @@ -9994,8 +9994,7 @@ def _add_numeric_operations(cls): cls, 'all', name, name2, axis_descr, _all_desc, nanops.nanall, _all_see_also, _all_examples, empty_value=True) - @Substitution(outname='mad', - desc="Return the mean absolute deviation of the values " + @Substitution(desc="Return the mean absolute deviation of the values " "for the requested axis.", name1=name, name2=name2, axis_descr=axis_descr, min_count='', see_also='', examples='') @@ -10036,8 +10035,7 @@ def mad(self, axis=None, skipna=None, level=None): "ddof argument", nanops.nanstd) - @Substitution(outname='compounded', - desc="Return the compound percentage of the values for " + @Substitution(desc="Return the compound percentage of the values for " "the requested axis.", name1=name, name2=name2, axis_descr=axis_descr, min_count='', see_also='', examples='') @@ -10275,7 +10273,7 @@ def _doc_parms(cls): Returns ------- -%(outname)s : %(name1)s or %(name2)s (if level specified) +%(name1)s or %(name2)s (if level specified) %(see_also)s %(examples)s\ """ @@ -10301,7 +10299,7 @@ def _doc_parms(cls): Returns ------- -%(outname)s : %(name1)s or %(name2)s (if level specified)\n""" +%(name1)s or %(name2)s (if level specified)\n""" _bool_doc = """ %(desc)s @@ -10420,7 +10418,7 @@ def _doc_parms(cls): Returns ------- -%(outname)s : %(name1)s or %(name2)s\n +%(name1)s or %(name2)s\n See Also -------- core.window.Expanding.%(accum_func_name)s : Similar functionality @@ -10913,7 +10911,7 @@ def _doc_parms(cls): def _make_min_count_stat_function(cls, name, name1, name2, axis_descr, desc, f, see_also='', examples=''): - @Substitution(outname=name, desc=desc, name1=name1, name2=name2, + @Substitution(desc=desc, name1=name1, name2=name2, axis_descr=axis_descr, min_count=_min_count_stub, see_also=see_also, examples=examples) @Appender(_num_doc) @@ -10941,7 +10939,7 @@ def stat_func(self, axis=None, skipna=None, level=None, numeric_only=None, def _make_stat_function(cls, name, name1, name2, axis_descr, desc, f, see_also='', examples=''): - @Substitution(outname=name, desc=desc, name1=name1, name2=name2, + @Substitution(desc=desc, name1=name1, name2=name2, axis_descr=axis_descr, min_count='', see_also=see_also, examples=examples) @Appender(_num_doc) @@ -10965,7 +10963,7 @@ def stat_func(self, axis=None, skipna=None, level=None, numeric_only=None, def _make_stat_function_ddof(cls, name, name1, name2, axis_descr, desc, f): - @Substitution(outname=name, desc=desc, name1=name1, name2=name2, + @Substitution(desc=desc, name1=name1, name2=name2, axis_descr=axis_descr) @Appender(_num_ddof_doc) def stat_func(self, axis=None, skipna=None, level=None, ddof=1, @@ -10986,7 +10984,7 @@ def stat_func(self, axis=None, skipna=None, level=None, ddof=1, def _make_cum_function(cls, name, name1, name2, axis_descr, desc, accum_func, accum_func_name, mask_a, mask_b, examples): - @Substitution(outname=name, desc=desc, name1=name1, name2=name2, + @Substitution(desc=desc, name1=name1, name2=name2, axis_descr=axis_descr, accum_func_name=accum_func_name, examples=examples) @Appender(_cnum_doc) @@ -11021,7 +11019,7 @@ def cum_func(self, axis=None, skipna=True, *args, **kwargs): def _make_logical_function(cls, name, name1, name2, axis_descr, desc, f, see_also, examples, empty_value): - @Substitution(outname=name, desc=desc, name1=name1, name2=name2, + @Substitution(desc=desc, name1=name1, name2=name2, axis_descr=axis_descr, see_also=see_also, examples=examples, empty_value=empty_value) @Appender(_bool_doc) diff --git a/pandas/core/indexes/accessors.py b/pandas/core/indexes/accessors.py index c43469d3c3a81..602e11a08b4ed 100644 --- a/pandas/core/indexes/accessors.py +++ b/pandas/core/indexes/accessors.py @@ -140,7 +140,7 @@ def to_pydatetime(self): Returns ------- numpy.ndarray - object dtype array containing native Python datetime objects. + Object dtype array containing native Python datetime objects. See Also -------- @@ -208,7 +208,7 @@ def to_pytimedelta(self): Returns ------- a : numpy.ndarray - 1D array containing data with `datetime.timedelta` type. + Array of 1D containing data with `datetime.timedelta` type. See Also -------- diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 686e53d44f4e5..664bf3a4040d8 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -4259,7 +4259,7 @@ def shift(self, periods=1, freq=None): Returns ------- pandas.Index - shifted index + Shifted index See Also -------- diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 14975dbbefa63..e2237afbcac0f 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -1391,7 +1391,7 @@ def get_level_values(self, level): Returns ------- values : Index - ``values`` is a level of this MultiIndex converted to + Values is a level of this MultiIndex converted to a single :class:`Index` (or subclass thereof). Examples diff --git a/pandas/core/ops.py b/pandas/core/ops.py index f6fc316437db7..dbdabecafae3a 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -459,14 +459,15 @@ def _get_op_name(op, special): Fill existing missing (NaN) values, and any new element needed for successful Series alignment, with this value before computation. If data in both corresponding Series locations is missing - the result will be missing + the result will be missing. level : int or name Broadcast across a level, matching Index values on the - passed MultiIndex level + passed MultiIndex level. Returns ------- -result : Series +Series + The result of the operation. See Also -------- @@ -495,6 +496,27 @@ def _get_op_name(op, special): d 1.0 e NaN dtype: float64 +>>> a.subtract(b, fill_value=0) +a 0.0 +b 1.0 +c 1.0 +d -1.0 +e NaN +dtype: float64 +>>> a.multiply(b) +a 1.0 +b NaN +c NaN +d NaN +e NaN +dtype: float64 +>>> a.divide(b, fill_value=0) +a 1.0 +b inf +c inf +d 0.0 +e NaN +dtype: float64 """ _arith_doc_FRAME = """ diff --git a/pandas/core/panel.py b/pandas/core/panel.py index e0da5af47accc..c8afafde48ac2 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -802,7 +802,7 @@ def major_xs(self, key): Returns ------- y : DataFrame - index -> minor axis, columns -> items + Index -> minor axis, columns -> items Notes ----- @@ -826,7 +826,7 @@ def minor_xs(self, key): Returns ------- y : DataFrame - index -> major axis, columns -> items + Index -> major axis, columns -> items Notes ----- diff --git a/pandas/core/series.py b/pandas/core/series.py index 63d1aec10e2ef..fb84a36d215e5 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2025,7 +2025,7 @@ def quantile(self, q=0.5, interpolation='linear'): Returns ------- quantile : float or Series - if ``q`` is an array, a Series will be returned where the + If ``q`` is an array, a Series will be returned where the index is ``q`` and the values are the quantiles. See Also diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index 61862ee0028d3..790cff95827fc 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -433,7 +433,7 @@ def render(self, **kwargs): Returns ------- rendered : str - the rendered HTML + The rendered HTML Notes ----- @@ -1223,7 +1223,7 @@ def from_custom_template(cls, searchpath, name): Returns ------- MyStyler : subclass of Styler - has the correct ``env`` and ``template`` class attributes set. + Has the correct ``env`` and ``template`` class attributes set. """ loader = ChoiceLoader([ FileSystemLoader(searchpath), diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 4e103482f48a2..2ab6ddb5b25c7 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -3288,7 +3288,7 @@ def get_attrs(self): self.nan_rep = getattr(self.attrs, 'nan_rep', None) self.encoding = _ensure_encoding( getattr(self.attrs, 'encoding', None)) - self.errors = getattr(self.attrs, 'errors', 'strict') + self.errors = _ensure_decoded(getattr(self.attrs, 'errors', 'strict')) self.levels = getattr( self.attrs, 'levels', None) or [] self.index_axes = [ diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index 386e5f57617cf..2e690ebbfa121 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -231,9 +231,9 @@ def assert_bool_op_api(opname, bool_frame_with_na, float_string_frame, getattr(bool_frame_with_na, opname)(axis=1, bool_only=False) -class TestDataFrameAnalytics(): +class TestDataFrameAnalytics(object): - # ---------------------------------------------------------------------= + # --------------------------------------------------------------------- # Correlation and covariance @td.skip_if_no_scipy @@ -502,6 +502,9 @@ def test_corrwith_kendall(self): expected = Series(np.ones(len(result))) tm.assert_series_equal(result, expected) + # --------------------------------------------------------------------- + # Describe + def test_bool_describe_in_mixed_frame(self): df = DataFrame({ 'string_data': ['a', 'b', 'c', 'd', 'e'], @@ -693,82 +696,113 @@ def test_describe_tz_values(self, tz_naive_fixture): result = df.describe(include='all') tm.assert_frame_equal(result, expected) - def test_reduce_mixed_frame(self): - # GH 6806 - df = DataFrame({ - 'bool_data': [True, True, False, False, False], - 'int_data': [10, 20, 30, 40, 50], - 'string_data': ['a', 'b', 'c', 'd', 'e'], - }) - df.reindex(columns=['bool_data', 'int_data', 'string_data']) - test = df.sum(axis=0) - tm.assert_numpy_array_equal(test.values, - np.array([2, 150, 'abcde'], dtype=object)) - tm.assert_series_equal(test, df.T.sum(axis=1)) + # --------------------------------------------------------------------- + # Reductions - def test_count(self, float_frame_with_na, float_frame, float_string_frame): - f = lambda s: notna(s).sum() - assert_stat_op_calc('count', f, float_frame_with_na, has_skipna=False, - check_dtype=False, check_dates=True) + def test_stat_op_api(self, float_frame, float_string_frame): assert_stat_op_api('count', float_frame, float_string_frame, has_numeric_only=True) + assert_stat_op_api('sum', float_frame, float_string_frame, + has_numeric_only=True) - # corner case - frame = DataFrame() - ct1 = frame.count(1) - assert isinstance(ct1, Series) + assert_stat_op_api('nunique', float_frame, float_string_frame) + assert_stat_op_api('mean', float_frame, float_string_frame) + assert_stat_op_api('product', float_frame, float_string_frame) + assert_stat_op_api('median', float_frame, float_string_frame) + assert_stat_op_api('min', float_frame, float_string_frame) + assert_stat_op_api('max', float_frame, float_string_frame) + assert_stat_op_api('mad', float_frame, float_string_frame) + assert_stat_op_api('var', float_frame, float_string_frame) + assert_stat_op_api('std', float_frame, float_string_frame) + assert_stat_op_api('sem', float_frame, float_string_frame) + assert_stat_op_api('median', float_frame, float_string_frame) - ct2 = frame.count(0) - assert isinstance(ct2, Series) + try: + from scipy.stats import skew, kurtosis # noqa:F401 + assert_stat_op_api('skew', float_frame, float_string_frame) + assert_stat_op_api('kurt', float_frame, float_string_frame) + except ImportError: + pass - # GH 423 - df = DataFrame(index=lrange(10)) - result = df.count(1) - expected = Series(0, index=df.index) - tm.assert_series_equal(result, expected) + def test_stat_op_calc(self, float_frame_with_na, mixed_float_frame): - df = DataFrame(columns=lrange(10)) - result = df.count(0) - expected = Series(0, index=df.columns) - tm.assert_series_equal(result, expected) + def count(s): + return notna(s).sum() - df = DataFrame() - result = df.count() - expected = Series(0, index=[]) - tm.assert_series_equal(result, expected) + def nunique(s): + return len(algorithms.unique1d(s.dropna())) + + def mad(x): + return np.abs(x - x.mean()).mean() - def test_nunique(self, float_frame_with_na, float_frame, - float_string_frame): - f = lambda s: len(algorithms.unique1d(s.dropna())) - assert_stat_op_calc('nunique', f, float_frame_with_na, + def var(x): + return np.var(x, ddof=1) + + def std(x): + return np.std(x, ddof=1) + + def sem(x): + return np.std(x, ddof=1) / np.sqrt(len(x)) + + def skewness(x): + from scipy.stats import skew # noqa:F811 + if len(x) < 3: + return np.nan + return skew(x, bias=False) + + def kurt(x): + from scipy.stats import kurtosis # noqa:F811 + if len(x) < 4: + return np.nan + return kurtosis(x, bias=False) + + assert_stat_op_calc('nunique', nunique, float_frame_with_na, has_skipna=False, check_dtype=False, check_dates=True) - assert_stat_op_api('nunique', float_frame, float_string_frame) - df = DataFrame({'A': [1, 1, 1], - 'B': [1, 2, 3], - 'C': [1, np.nan, 3]}) - tm.assert_series_equal(df.nunique(), Series({'A': 1, 'B': 3, 'C': 2})) - tm.assert_series_equal(df.nunique(dropna=False), - Series({'A': 1, 'B': 3, 'C': 3})) - tm.assert_series_equal(df.nunique(axis=1), Series({0: 1, 1: 2, 2: 2})) - tm.assert_series_equal(df.nunique(axis=1, dropna=False), - Series({0: 1, 1: 3, 2: 2})) - - def test_sum(self, float_frame_with_na, mixed_float_frame, - float_frame, float_string_frame): - assert_stat_op_api('sum', float_frame, float_string_frame, - has_numeric_only=True) - assert_stat_op_calc('sum', np.sum, float_frame_with_na, - skipna_alternative=np.nansum) # mixed types (with upcasting happening) assert_stat_op_calc('sum', np.sum, mixed_float_frame.astype('float32'), check_dtype=False, check_less_precise=True) + assert_stat_op_calc('sum', np.sum, float_frame_with_na, + skipna_alternative=np.nansum) + assert_stat_op_calc('mean', np.mean, float_frame_with_na, + check_dates=True) + assert_stat_op_calc('product', np.prod, float_frame_with_na) + + assert_stat_op_calc('mad', mad, float_frame_with_na) + assert_stat_op_calc('var', var, float_frame_with_na) + assert_stat_op_calc('std', std, float_frame_with_na) + assert_stat_op_calc('sem', sem, float_frame_with_na) + + assert_stat_op_calc('count', count, float_frame_with_na, + has_skipna=False, check_dtype=False, + check_dates=True) + + try: + from scipy import skew, kurtosis # noqa:F401 + assert_stat_op_calc('skew', skewness, float_frame_with_na) + assert_stat_op_calc('kurt', kurt, float_frame_with_na) + except ImportError: + pass + + # TODO: Ensure warning isn't emitted in the first place + @pytest.mark.filterwarnings("ignore:All-NaN:RuntimeWarning") + def test_median(self, float_frame_with_na, int_frame): + def wrapper(x): + if isna(x).any(): + return np.nan + return np.median(x) + + assert_stat_op_calc('median', wrapper, float_frame_with_na, + check_dates=True) + assert_stat_op_calc('median', wrapper, int_frame, check_dtype=False, + check_dates=True) + @pytest.mark.parametrize('method', ['sum', 'mean', 'prod', 'var', 'std', 'skew', 'min', 'max']) def test_stat_operators_attempt_obj_array(self, method): - # GH 676 + # GH#676 data = { 'a': [-0.00049987540199591344, -0.0016467257772919831, 0.00067695870775883013], @@ -789,10 +823,44 @@ def test_stat_operators_attempt_obj_array(self, method): if method in ['sum', 'prod']: tm.assert_series_equal(result, expected) - def test_mean(self, float_frame_with_na, float_frame, float_string_frame): - assert_stat_op_calc('mean', np.mean, float_frame_with_na, - check_dates=True) - assert_stat_op_api('mean', float_frame, float_string_frame) + @pytest.mark.parametrize('op', ['mean', 'std', 'var', + 'skew', 'kurt', 'sem']) + def test_mixed_ops(self, op): + # GH#16116 + df = DataFrame({'int': [1, 2, 3, 4], + 'float': [1., 2., 3., 4.], + 'str': ['a', 'b', 'c', 'd']}) + + result = getattr(df, op)() + assert len(result) == 2 + + with pd.option_context('use_bottleneck', False): + result = getattr(df, op)() + assert len(result) == 2 + + def test_reduce_mixed_frame(self): + # GH 6806 + df = DataFrame({ + 'bool_data': [True, True, False, False, False], + 'int_data': [10, 20, 30, 40, 50], + 'string_data': ['a', 'b', 'c', 'd', 'e'], + }) + df.reindex(columns=['bool_data', 'int_data', 'string_data']) + test = df.sum(axis=0) + tm.assert_numpy_array_equal(test.values, + np.array([2, 150, 'abcde'], dtype=object)) + tm.assert_series_equal(test, df.T.sum(axis=1)) + + def test_nunique(self): + df = DataFrame({'A': [1, 1, 1], + 'B': [1, 2, 3], + 'C': [1, np.nan, 3]}) + tm.assert_series_equal(df.nunique(), Series({'A': 1, 'B': 3, 'C': 2})) + tm.assert_series_equal(df.nunique(dropna=False), + Series({'A': 1, 'B': 3, 'C': 3})) + tm.assert_series_equal(df.nunique(axis=1), Series({0: 1, 1: 2, 2: 2})) + tm.assert_series_equal(df.nunique(axis=1, dropna=False), + Series({0: 1, 1: 3, 2: 2})) @pytest.mark.parametrize('tz', [None, 'UTC']) def test_mean_mixed_datetime_numeric(self, tz): @@ -813,103 +881,7 @@ def test_mean_excludeds_datetimes(self, tz): expected = pd.Series() tm.assert_series_equal(result, expected) - def test_product(self, float_frame_with_na, float_frame, - float_string_frame): - assert_stat_op_calc('product', np.prod, float_frame_with_na) - assert_stat_op_api('product', float_frame, float_string_frame) - - # TODO: Ensure warning isn't emitted in the first place - @pytest.mark.filterwarnings("ignore:All-NaN:RuntimeWarning") - def test_median(self, float_frame_with_na, float_frame, - float_string_frame): - def wrapper(x): - if isna(x).any(): - return np.nan - return np.median(x) - - assert_stat_op_calc('median', wrapper, float_frame_with_na, - check_dates=True) - assert_stat_op_api('median', float_frame, float_string_frame) - - def test_min(self, float_frame_with_na, int_frame, - float_frame, float_string_frame): - with warnings.catch_warnings(record=True): - warnings.simplefilter("ignore", RuntimeWarning) - assert_stat_op_calc('min', np.min, float_frame_with_na, - check_dates=True) - assert_stat_op_calc('min', np.min, int_frame) - assert_stat_op_api('min', float_frame, float_string_frame) - - def test_cummin(self, datetime_frame): - datetime_frame.loc[5:10, 0] = np.nan - datetime_frame.loc[10:15, 1] = np.nan - datetime_frame.loc[15:, 2] = np.nan - - # axis = 0 - cummin = datetime_frame.cummin() - expected = datetime_frame.apply(Series.cummin) - tm.assert_frame_equal(cummin, expected) - - # axis = 1 - cummin = datetime_frame.cummin(axis=1) - expected = datetime_frame.apply(Series.cummin, axis=1) - tm.assert_frame_equal(cummin, expected) - - # it works - df = DataFrame({'A': np.arange(20)}, index=np.arange(20)) - result = df.cummin() # noqa - - # fix issue - cummin_xs = datetime_frame.cummin(axis=1) - assert np.shape(cummin_xs) == np.shape(datetime_frame) - - def test_cummax(self, datetime_frame): - datetime_frame.loc[5:10, 0] = np.nan - datetime_frame.loc[10:15, 1] = np.nan - datetime_frame.loc[15:, 2] = np.nan - - # axis = 0 - cummax = datetime_frame.cummax() - expected = datetime_frame.apply(Series.cummax) - tm.assert_frame_equal(cummax, expected) - - # axis = 1 - cummax = datetime_frame.cummax(axis=1) - expected = datetime_frame.apply(Series.cummax, axis=1) - tm.assert_frame_equal(cummax, expected) - - # it works - df = DataFrame({'A': np.arange(20)}, index=np.arange(20)) - result = df.cummax() # noqa - - # fix issue - cummax_xs = datetime_frame.cummax(axis=1) - assert np.shape(cummax_xs) == np.shape(datetime_frame) - - def test_max(self, float_frame_with_na, int_frame, - float_frame, float_string_frame): - with warnings.catch_warnings(record=True): - warnings.simplefilter("ignore", RuntimeWarning) - assert_stat_op_calc('max', np.max, float_frame_with_na, - check_dates=True) - assert_stat_op_calc('max', np.max, int_frame) - assert_stat_op_api('max', float_frame, float_string_frame) - - def test_mad(self, float_frame_with_na, float_frame, float_string_frame): - f = lambda x: np.abs(x - x.mean()).mean() - assert_stat_op_calc('mad', f, float_frame_with_na) - assert_stat_op_api('mad', float_frame, float_string_frame) - - def test_var_std(self, float_frame_with_na, datetime_frame, float_frame, - float_string_frame): - alt = lambda x: np.var(x, ddof=1) - assert_stat_op_calc('var', alt, float_frame_with_na) - assert_stat_op_api('var', float_frame, float_string_frame) - - alt = lambda x: np.std(x, ddof=1) - assert_stat_op_calc('std', alt, float_frame_with_na) - assert_stat_op_api('std', float_frame, float_string_frame) - + def test_var_std(self, datetime_frame): result = datetime_frame.std(ddof=4) expected = datetime_frame.apply(lambda x: x.std(ddof=4)) tm.assert_almost_equal(result, expected) @@ -952,79 +924,7 @@ def test_numeric_only_flag(self, meth): pytest.raises(TypeError, lambda: getattr(df2, meth)( axis=1, numeric_only=False)) - @pytest.mark.parametrize('op', ['mean', 'std', 'var', - 'skew', 'kurt', 'sem']) - def test_mixed_ops(self, op): - # GH 16116 - df = DataFrame({'int': [1, 2, 3, 4], - 'float': [1., 2., 3., 4.], - 'str': ['a', 'b', 'c', 'd']}) - - result = getattr(df, op)() - assert len(result) == 2 - - with pd.option_context('use_bottleneck', False): - result = getattr(df, op)() - assert len(result) == 2 - - def test_cumsum(self, datetime_frame): - datetime_frame.loc[5:10, 0] = np.nan - datetime_frame.loc[10:15, 1] = np.nan - datetime_frame.loc[15:, 2] = np.nan - - # axis = 0 - cumsum = datetime_frame.cumsum() - expected = datetime_frame.apply(Series.cumsum) - tm.assert_frame_equal(cumsum, expected) - - # axis = 1 - cumsum = datetime_frame.cumsum(axis=1) - expected = datetime_frame.apply(Series.cumsum, axis=1) - tm.assert_frame_equal(cumsum, expected) - - # works - df = DataFrame({'A': np.arange(20)}, index=np.arange(20)) - result = df.cumsum() # noqa - - # fix issue - cumsum_xs = datetime_frame.cumsum(axis=1) - assert np.shape(cumsum_xs) == np.shape(datetime_frame) - - def test_cumprod(self, datetime_frame): - datetime_frame.loc[5:10, 0] = np.nan - datetime_frame.loc[10:15, 1] = np.nan - datetime_frame.loc[15:, 2] = np.nan - - # axis = 0 - cumprod = datetime_frame.cumprod() - expected = datetime_frame.apply(Series.cumprod) - tm.assert_frame_equal(cumprod, expected) - - # axis = 1 - cumprod = datetime_frame.cumprod(axis=1) - expected = datetime_frame.apply(Series.cumprod, axis=1) - tm.assert_frame_equal(cumprod, expected) - - # fix issue - cumprod_xs = datetime_frame.cumprod(axis=1) - assert np.shape(cumprod_xs) == np.shape(datetime_frame) - - # ints - df = datetime_frame.fillna(0).astype(int) - df.cumprod(0) - df.cumprod(1) - - # ints32 - df = datetime_frame.fillna(0).astype(np.int32) - df.cumprod(0) - df.cumprod(1) - - def test_sem(self, float_frame_with_na, datetime_frame, - float_frame, float_string_frame): - alt = lambda x: np.std(x, ddof=1) / np.sqrt(len(x)) - assert_stat_op_calc('sem', alt, float_frame_with_na) - assert_stat_op_api('sem', float_frame, float_string_frame) - + def test_sem(self, datetime_frame): result = datetime_frame.sem(ddof=4) expected = datetime_frame.apply( lambda x: x.std(ddof=4) / np.sqrt(len(x))) @@ -1039,29 +939,7 @@ def test_sem(self, float_frame_with_na, datetime_frame, assert not (result < 0).any() @td.skip_if_no_scipy - def test_skew(self, float_frame_with_na, float_frame, float_string_frame): - from scipy.stats import skew - - def alt(x): - if len(x) < 3: - return np.nan - return skew(x, bias=False) - - assert_stat_op_calc('skew', alt, float_frame_with_na) - assert_stat_op_api('skew', float_frame, float_string_frame) - - @td.skip_if_no_scipy - def test_kurt(self, float_frame_with_na, float_frame, float_string_frame): - from scipy.stats import kurtosis - - def alt(x): - if len(x) < 4: - return np.nan - return kurtosis(x, bias=False) - - assert_stat_op_calc('kurt', alt, float_frame_with_na) - assert_stat_op_api('kurt', float_frame, float_string_frame) - + def test_kurt(self): index = MultiIndex(levels=[['bar'], ['one', 'two', 'three'], [0, 1]], codes=[[0, 0, 0, 0, 0, 0], [0, 1, 2, 0, 1, 2], @@ -1323,20 +1201,146 @@ def test_stats_mixed_type(self, float_string_frame): float_string_frame.mean(1) float_string_frame.skew(1) - # TODO: Ensure warning isn't emitted in the first place - @pytest.mark.filterwarnings("ignore:All-NaN:RuntimeWarning") - def test_median_corner(self, int_frame, float_frame, float_string_frame): - def wrapper(x): - if isna(x).any(): - return np.nan - return np.median(x) + def test_sum_bools(self): + df = DataFrame(index=lrange(1), columns=lrange(10)) + bools = isna(df) + assert bools.sum(axis=1)[0] == 10 - assert_stat_op_calc('median', wrapper, int_frame, check_dtype=False, - check_dates=True) - assert_stat_op_api('median', float_frame, float_string_frame) + # --------------------------------------------------------------------- + # Cumulative Reductions - cumsum, cummax, ... + + def test_cumsum_corner(self): + dm = DataFrame(np.arange(20).reshape(4, 5), + index=lrange(4), columns=lrange(5)) + # ?(wesm) + result = dm.cumsum() # noqa + + def test_cumsum(self, datetime_frame): + datetime_frame.loc[5:10, 0] = np.nan + datetime_frame.loc[10:15, 1] = np.nan + datetime_frame.loc[15:, 2] = np.nan + + # axis = 0 + cumsum = datetime_frame.cumsum() + expected = datetime_frame.apply(Series.cumsum) + tm.assert_frame_equal(cumsum, expected) + + # axis = 1 + cumsum = datetime_frame.cumsum(axis=1) + expected = datetime_frame.apply(Series.cumsum, axis=1) + tm.assert_frame_equal(cumsum, expected) + + # works + df = DataFrame({'A': np.arange(20)}, index=np.arange(20)) + result = df.cumsum() # noqa + + # fix issue + cumsum_xs = datetime_frame.cumsum(axis=1) + assert np.shape(cumsum_xs) == np.shape(datetime_frame) + + def test_cumprod(self, datetime_frame): + datetime_frame.loc[5:10, 0] = np.nan + datetime_frame.loc[10:15, 1] = np.nan + datetime_frame.loc[15:, 2] = np.nan + + # axis = 0 + cumprod = datetime_frame.cumprod() + expected = datetime_frame.apply(Series.cumprod) + tm.assert_frame_equal(cumprod, expected) + + # axis = 1 + cumprod = datetime_frame.cumprod(axis=1) + expected = datetime_frame.apply(Series.cumprod, axis=1) + tm.assert_frame_equal(cumprod, expected) + + # fix issue + cumprod_xs = datetime_frame.cumprod(axis=1) + assert np.shape(cumprod_xs) == np.shape(datetime_frame) + + # ints + df = datetime_frame.fillna(0).astype(int) + df.cumprod(0) + df.cumprod(1) + # ints32 + df = datetime_frame.fillna(0).astype(np.int32) + df.cumprod(0) + df.cumprod(1) + + def test_cummin(self, datetime_frame): + datetime_frame.loc[5:10, 0] = np.nan + datetime_frame.loc[10:15, 1] = np.nan + datetime_frame.loc[15:, 2] = np.nan + + # axis = 0 + cummin = datetime_frame.cummin() + expected = datetime_frame.apply(Series.cummin) + tm.assert_frame_equal(cummin, expected) + + # axis = 1 + cummin = datetime_frame.cummin(axis=1) + expected = datetime_frame.apply(Series.cummin, axis=1) + tm.assert_frame_equal(cummin, expected) + + # it works + df = DataFrame({'A': np.arange(20)}, index=np.arange(20)) + result = df.cummin() # noqa + + # fix issue + cummin_xs = datetime_frame.cummin(axis=1) + assert np.shape(cummin_xs) == np.shape(datetime_frame) + + def test_cummax(self, datetime_frame): + datetime_frame.loc[5:10, 0] = np.nan + datetime_frame.loc[10:15, 1] = np.nan + datetime_frame.loc[15:, 2] = np.nan + + # axis = 0 + cummax = datetime_frame.cummax() + expected = datetime_frame.apply(Series.cummax) + tm.assert_frame_equal(cummax, expected) + + # axis = 1 + cummax = datetime_frame.cummax(axis=1) + expected = datetime_frame.apply(Series.cummax, axis=1) + tm.assert_frame_equal(cummax, expected) + + # it works + df = DataFrame({'A': np.arange(20)}, index=np.arange(20)) + result = df.cummax() # noqa + + # fix issue + cummax_xs = datetime_frame.cummax(axis=1) + assert np.shape(cummax_xs) == np.shape(datetime_frame) + + # --------------------------------------------------------------------- # Miscellanea + def test_count(self): + # corner case + frame = DataFrame() + ct1 = frame.count(1) + assert isinstance(ct1, Series) + + ct2 = frame.count(0) + assert isinstance(ct2, Series) + + # GH#423 + df = DataFrame(index=lrange(10)) + result = df.count(1) + expected = Series(0, index=df.index) + tm.assert_series_equal(result, expected) + + df = DataFrame(columns=lrange(10)) + result = df.count(0) + expected = Series(0, index=df.columns) + tm.assert_series_equal(result, expected) + + df = DataFrame() + result = df.count() + expected = Series(0, index=[]) + tm.assert_series_equal(result, expected) + def test_count_objects(self, float_string_frame): dm = DataFrame(float_string_frame._series) df = DataFrame(float_string_frame._series) @@ -1344,17 +1348,23 @@ def test_count_objects(self, float_string_frame): tm.assert_series_equal(dm.count(), df.count()) tm.assert_series_equal(dm.count(1), df.count(1)) - def test_cumsum_corner(self): - dm = DataFrame(np.arange(20).reshape(4, 5), - index=lrange(4), columns=lrange(5)) - # ?(wesm) - result = dm.cumsum() # noqa + def test_pct_change(self): + # GH#11150 + pnl = DataFrame([np.arange(0, 40, 10), + np.arange(0, 40, 10), + np.arange(0, 40, 10)]).astype(np.float64) + pnl.iat[1, 0] = np.nan + pnl.iat[1, 1] = np.nan + pnl.iat[2, 3] = 60 - def test_sum_bools(self): - df = DataFrame(index=lrange(1), columns=lrange(10)) - bools = isna(df) - assert bools.sum(axis=1)[0] == 10 + for axis in range(2): + expected = pnl.ffill(axis=axis) / pnl.ffill(axis=axis).shift( + axis=axis) - 1 + result = pnl.pct_change(axis=axis, fill_method='pad') + tm.assert_frame_equal(result, expected) + + # ---------------------------------------------------------------------- # Index of max / min def test_idxmin(self, float_frame, int_frame): @@ -1442,6 +1452,26 @@ def test_any_datetime(self): expected = Series([True, True, True, False]) tm.assert_series_equal(result, expected) + def test_any_all_bool_only(self): + + # GH 25101 + df = DataFrame({"col1": [1, 2, 3], + "col2": [4, 5, 6], + "col3": [None, None, None]}) + + result = df.all(bool_only=True) + expected = Series(dtype=np.bool) + tm.assert_series_equal(result, expected) + + df = DataFrame({"col1": [1, 2, 3], + "col2": [4, 5, 6], + "col3": [None, None, None], + "col4": [False, False, True]}) + + result = df.all(bool_only=True) + expected = Series({"col4": False}) + tm.assert_series_equal(result, expected) + @pytest.mark.parametrize('func, data, expected', [ (np.any, {}, False), (np.all, {}, True), @@ -1680,7 +1710,9 @@ def test_isin_empty_datetimelike(self): result = df1_td.isin(df3) tm.assert_frame_equal(result, expected) + # --------------------------------------------------------------------- # Rounding + def test_round(self): # GH 2665 @@ -1868,22 +1900,9 @@ def test_round_nonunique_categorical(self): tm.assert_frame_equal(result, expected) - def test_pct_change(self): - # GH 11150 - pnl = DataFrame([np.arange(0, 40, 10), np.arange(0, 40, 10), np.arange( - 0, 40, 10)]).astype(np.float64) - pnl.iat[1, 0] = np.nan - pnl.iat[1, 1] = np.nan - pnl.iat[2, 3] = 60 - - for axis in range(2): - expected = pnl.ffill(axis=axis) / pnl.ffill(axis=axis).shift( - axis=axis) - 1 - result = pnl.pct_change(axis=axis, fill_method='pad') - - tm.assert_frame_equal(result, expected) - + # --------------------------------------------------------------------- # Clip + def test_clip(self, float_frame): median = float_frame.median().median() original = float_frame.copy() @@ -2056,7 +2075,9 @@ def test_clip_with_na_args(self, float_frame): 'col_2': [np.nan, np.nan, np.nan]}) tm.assert_frame_equal(result, expected) + # --------------------------------------------------------------------- # Matrix-like + def test_dot(self): a = DataFrame(np.random.randn(3, 4), index=['a', 'b', 'c'], columns=['p', 'q', 'r', 's']) diff --git a/pandas/tests/io/data/legacy_hdf/legacy_table_py2.h5 b/pandas/tests/io/data/legacy_hdf/legacy_table_py2.h5 new file mode 100644 index 0000000000000..3863d714a315b Binary files /dev/null and b/pandas/tests/io/data/legacy_hdf/legacy_table_py2.h5 differ diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py index 517a3e059469c..9430011288f27 100644 --- a/pandas/tests/io/test_pytables.py +++ b/pandas/tests/io/test_pytables.py @@ -4540,7 +4540,7 @@ def test_pytables_native2_read(self, datapath): def test_legacy_table_fixed_format_read_py2(self, datapath): # GH 24510 - # legacy table with fixed format written en Python 2 + # legacy table with fixed format written in Python 2 with ensure_clean_store( datapath('io', 'data', 'legacy_hdf', 'legacy_table_fixed_py2.h5'), @@ -4552,6 +4552,21 @@ def test_legacy_table_fixed_format_read_py2(self, datapath): name='INDEX_NAME')) assert_frame_equal(expected, result) + def test_legacy_table_read_py2(self, datapath): + # issue: 24925 + # legacy table written in Python 2 + with ensure_clean_store( + datapath('io', 'data', 'legacy_hdf', + 'legacy_table_py2.h5'), + mode='r') as store: + result = store.select('table') + + expected = pd.DataFrame({ + "a": ["a", "b"], + "b": [2, 3] + }) + assert_frame_equal(expected, result) + def test_legacy_table_read(self, datapath): # legacy table types with ensure_clean_store(