From 7959eb65635c41dffe93383d2b2f94540631bc44 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 30 Oct 2018 10:26:16 -0500 Subject: [PATCH 01/17] API: Public data attributes for EA-backed containers This adds two new methods for working with EA-backed Series / Index. - `.array -> Union[ExtensionArray, ndarray]`: the actual backing array - `.to_numpy() -> ndarray`: A NumPy representation of the data `.array` is always a reference to the actual data stored in the container. Updating it inplace (not recommended) will be reflected in the Series (or Index for that matter, so really not recommended). `to_numpy()` may (or may not) require data copying / coercion. Closes https://github.com/pandas-dev/pandas/issues/19954 --- pandas/core/base.py | 91 ++++++++++++++++++++++++++++++++++++ pandas/core/indexes/base.py | 3 +- pandas/core/indexes/multi.py | 20 ++++++++ pandas/tests/test_base.py | 51 ++++++++++++++++++++ 4 files changed, 164 insertions(+), 1 deletion(-) diff --git a/pandas/core/base.py b/pandas/core/base.py index de368f52b6f00..a6d5d6a86ca27 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -765,6 +765,97 @@ def base(self): FutureWarning, stacklevel=2) return self.values.base + @property + def array(self): + # type: () -> Union[np.ndarray, ExtensionArray] + """The actual Array backing this Series or Index. + + Returns + ------- + Union[ndarray, ExtensionArray] + This is the actual array stored within this object. + + Notes + ----- + This table lays out the different array types for each extension + dtype within pandas. + + ================== ============================= + dtype array type + ================== ============================= + category Categorical + period PeriodArray + interval IntervalArray + IntegerNA IntegerArray + datetime64[ns, tz] datetime64[ns]? DatetimeArray + ================== ============================= + + For any 3rd-party extension types, the array type will be an + ExtensionArray. + + All remaining arrays (ndarrays), ``.array`` will be the ndarray + stored within. + + See Also + -------- + to_numpy : Similar method that always returns a NumPy array. + + Examples + -------- + >>> ser = pd.Series(pd.Categorical(['a', 'b', 'a'])) + >>> ser.array + [a, b, a] + Categories (2, object): [a, b] + """ + return self._values + + def to_numpy(self): + """A NumPy array representing the values in this Series or Index. + + The returned array will be the same up to equality (values equal + in `self` will be equal in the returned array; likewise for values + that are not equal). + + Returns + ------- + numpy.ndarray + An ndarray with + + Notes + ----- + For NumPy arrays, this will be a reference to the actual data stored + in this Series or Index. + + For extension types, this may involve copying data and coercing the + result to a NumPy type (possibly object), which may be expensive. + + This table lays out the different array types for each extension + dtype within pandas. + + ================== ================================ + dtype array type + ================== ================================ + category[T] ndarray[T] (same dtype as input) + period ndarray[object] (Periods) + interval ndarray[object] (Intervals) + IntegerNA IntegerArray[object] + datetime64[ns, tz] datetime64[ns]? object? + ================== ================================ + + See Also + -------- + array : Get the actual data stored within. + + Examples + -------- + >>> ser = pd.Series(pd.Categorical(['a', 'b', 'a'])) + >>> ser.to_numpy() + array(['a', 'b', 'a'], dtype=object) + """ + if is_extension_array_dtype(self.dtype): + return np.asarray(self._values) + return self._values + @property def _ndarray_values(self): # type: () -> np.ndarray diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 6e65d6899787f..b712d5b097a4a 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -710,6 +710,7 @@ def values(self): @property def _values(self): # type: () -> Union[ExtensionArray, Index] + # TODO: remove in favor of .array # TODO(EA): remove index types as they become extension arrays """The best array representation. @@ -739,7 +740,7 @@ def _values(self): values _ndarray_values """ - return self.values + return self._data def get_values(self): """ diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index c694289efc493..1999405bd5f87 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -288,6 +288,26 @@ def _verify_integrity(self, labels=None, levels=None): def levels(self): return self._levels + @property + def _values(self): + # TODO: remove + # We override here, since our parent uses _data, which we dont' use. + return self.values + + @property + def array(self): + """ + Raises a ValueError for `MultiIndex` because there's no single + array backing a MultiIndex. + + Raises + ------ + ValueError + """ + msg = ("MultiIndex has no single backing array. Use " + "'MultiIndex.to_numpy()' to get a NumPy array of tuples.") + raise ValueError(msg) + @property def _is_homogeneous_type(self): """Whether the levels of a MultiIndex all have the same dtype. diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py index fe2956adc35af..4aa1217ebe9f6 100644 --- a/pandas/tests/test_base.py +++ b/pandas/tests/test_base.py @@ -1243,3 +1243,54 @@ def test_ndarray_values(array, expected): r_values = pd.Index(array)._ndarray_values tm.assert_numpy_array_equal(l_values, r_values) tm.assert_numpy_array_equal(l_values, expected) + + +@pytest.mark.parametrize("array, attr", [ + (np.array([1, 2], dtype=np.int64), None), + (pd.Categorical(['a', 'b']), '_codes'), + (pd.core.arrays.period_array(['2000', '2001'], freq='D'), '_data'), + (pd.core.arrays.integer_array([0, np.nan]), '_data'), + (pd.core.arrays.IntervalArray.from_breaks([0, 1]), '_left'), + (pd.SparseArray([0, 1]), '_sparse_values'), + # TODO: DatetimeArray(add) +]) +@pytest.mark.parametrize('box', [pd.Series, pd.Index]) +def test_array(array, attr, box): + if array.dtype.name in ('Int64', 'Sparse[int64, 0]'): + pytest.skip("No index type for {}".format(array.dtype)) + result = box(array, copy=False).array + + if attr: + array = getattr(array, attr) + result = getattr(result, attr) + + assert result is array + + +def test_array_multiindex_raises(): + idx = pd.MultiIndex.from_product([['A'], ['a', 'b']]) + with tm.assert_raises_regex(ValueError, 'MultiIndex'): + idx.array + + +@pytest.mark.parametrize('array, expected', [ + (np.array([1, 2], dtype=np.int64), np.array([1, 2], dtype=np.int64)), + (pd.Categorical(['a', 'b']), np.array(['a', 'b'], dtype=object)), + (pd.core.arrays.period_array(['2000', '2001'], freq='D'), + np.array([pd.Period('2000', freq="D"), pd.Period('2001', freq='D')])), + (pd.core.arrays.integer_array([0, np.nan]), + np.array([1, np.nan], dtype=object)), + (pd.core.arrays.IntervalArray.from_breaks([0, 1, 2]), + np.array([pd.Interval(0, 1), pd.Interval(1, 2)], dtype=object)), + (pd.SparseArray([0, 1]), np.array([0, 1], dtype=np.int64)), + # TODO: DatetimeArray(add) +]) +@pytest.mark.parametrize('box', [pd.Series, pd.Index]) +def test_to_numpy(array, expected, box): + thing = box(array) + + if array.dtype.name in ('Int64', 'Sparse[int64, 0]'): + pytest.skip("No index type for {}".format(array.dtype)) + + result = thing.to_numpy() + tm.assert_numpy_array_equal(result, expected) From 5b15894dc8ac5ee5b6b581baa79b441e2ed05115 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 6 Nov 2018 16:06:54 -0600 Subject: [PATCH 02/17] update --- doc/source/dsintro.rst | 35 ++++++++++++++++++++++++++++++++- doc/source/whatsnew/v0.24.0.txt | 14 +++++++++++++ 2 files changed, 48 insertions(+), 1 deletion(-) diff --git a/doc/source/dsintro.rst b/doc/source/dsintro.rst index d02912294060c..27d483e9c4cee 100644 --- a/doc/source/dsintro.rst +++ b/doc/source/dsintro.rst @@ -137,7 +137,40 @@ However, operations such as slicing will also slice the index. s[[4, 3, 1]] np.exp(s) -We will address array-based indexing in a separate :ref:`section `. +.. note:: + + We will address array-based indexing like ``s[[4, 3, 1]]`` + in :ref:`section `. + +Like a NumPy array, a pandas Series as a :attr:`Series.dtype`. + +.. ipython:: python + + s.dtype + +This is often a NumPy dtype. However, pandas and 3rd-party libraries +extend NumPy's type system in a few places, in which case the dtype would +be a :class:`~pandas.api.extensions.ExtensionDtype`. +See :ref:`dsintro.data_type` for more. + +If you need the actual array backing a ``Series``, use :attr:`Series.array`. + +.. ipython:: python + + s.array + +Again, this is often a NumPy array, but may instead be a +:class:`~pandas.api.extensions.ExtensionArray`. See :ref:`dsintro.data_type` for more. + +While Series is ndarray-like, if you need an *actual* ndarray, then use +:meth:`Series.to_numpy`. + +.. ipython:: python + + s.to_numpy() + +Even if the Series is backed by an extension type, :meth:`Series.to_numpy` will return +a NumPy ndarray. Series is dict-like ~~~~~~~~~~~~~~~~~~~ diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 149d618c4a621..282dbe75389d1 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -21,6 +21,20 @@ the user to override the engine's default behavior to include or omit the dataframe's indexes from the resulting Parquet file. (:issue:`20768`) - :meth:`DataFrame.corr` and :meth:`Series.corr` now accept a callable for generic calculation methods of correlation, e.g. histogram intersection (:issue:`22684`) +.. _whatsnew_0240.values_api: + +:attr:`Series.array` and :attr:`Index.array` have been added for extracting the array backing a +``Series`` or ``Index``. Historically, this would have been done with ``series.values``, but with +``.values`` it was unclear whether the returned value would be the actual array, or some transformation +of it. + +If you need a NumPy array, use :meth:`Series.to_numpy` + +.. ipython:: python + + arr = period_array(['2000', '2001'], freq='A') + ser = pd.Series(arr) + ser.to_numpy() .. _whatsnew_0240.enhancements.extension_array_operators: From 15cc0b7f6ec92d7e239ed31f5fb7305affc4ed3d Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Sun, 11 Nov 2018 07:04:39 -0600 Subject: [PATCH 03/17] more notes --- doc/source/dsintro.rst | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/doc/source/dsintro.rst b/doc/source/dsintro.rst index 38713766e4fa8..2d1ec716a1acc 100644 --- a/doc/source/dsintro.rst +++ b/doc/source/dsintro.rst @@ -150,8 +150,8 @@ Like a NumPy array, a pandas Series as a :attr:`Series.dtype`. This is often a NumPy dtype. However, pandas and 3rd-party libraries extend NumPy's type system in a few places, in which case the dtype would -be a :class:`~pandas.api.extensions.ExtensionDtype`. -See :ref:`dsintro.data_type` for more. +be a :class:`~pandas.api.extensions.ExtensionDtype`. Some examples within +pandas are :ref:`categorical` and :ref:`integer_na`. See :ref:`dsintro.data_type` for more. If you need the actual array backing a ``Series``, use :attr:`Series.array`. @@ -161,6 +161,8 @@ If you need the actual array backing a ``Series``, use :attr:`Series.array`. Again, this is often a NumPy array, but may instead be a :class:`~pandas.api.extensions.ExtensionArray`. See :ref:`dsintro.data_type` for more. +Accessing the array can be useful when you need to do some operation without the +index (to disable :ref:`automatic alignment `, for example). While Series is ndarray-like, if you need an *actual* ndarray, then use :meth:`Series.to_numpy`. @@ -169,8 +171,8 @@ While Series is ndarray-like, if you need an *actual* ndarray, then use s.to_numpy() -Even if the Series is backed by an extension type, :meth:`Series.to_numpy` will return -a NumPy ndarray. +Even if the Series is backed by a :class:`~pandas.api.extensions.ExtensionArray`, +:meth:`Series.to_numpy` will return a NumPy ndarray. Series is dict-like ~~~~~~~~~~~~~~~~~~~ @@ -650,6 +652,8 @@ slicing, see the :ref:`section on indexing `. We will address the fundamentals of reindexing / conforming to new sets of labels in the :ref:`section on reindexing `. +.. _dsintro.alignment: + Data alignment and arithmetic ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ From 888853f484561caf3408531e4759c2b5f774152f Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Sun, 11 Nov 2018 07:13:55 -0600 Subject: [PATCH 04/17] update --- doc/source/whatsnew/v0.24.0.txt | 38 +++++++++++++++++++++++++++------ pandas/tests/test_base.py | 2 +- 2 files changed, 33 insertions(+), 7 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 273235c332817..fd248e2afb304 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -23,17 +23,43 @@ dataframe's indexes from the resulting Parquet file. (:issue:`20768`) .. _whatsnew_0240.values_api: +Accessing the values in a Series or Index +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + :attr:`Series.array` and :attr:`Index.array` have been added for extracting the array backing a -``Series`` or ``Index``. Historically, this would have been done with ``series.values``, but with -``.values`` it was unclear whether the returned value would be the actual array, or some transformation -of it. +``Series`` or ``Index``. + +.. ipython:: python + + idx = pd.period_range('2000', periods=4) + idx.array + pd.Series(idx).array + +Historically, this would have been done with ``series.values``, but with +``.values`` it was unclear whether the returned value would be the actual array, +some transformation of it, or one of pandas custom arrays (like +``Categorical``). For example, with :class:`PeriodIndex`, ``.values`` generates +a new ndarray of period objects each time. + +.. ipython:: python + + id(idx.values) + id(idx.values) + +If you need an actual NumPy array, use :meth:`Series.to_numpy` or :meth:`Index.to_numpy`. + +.. ipython:: python + + idx.to_numpy() + pd.Series(idx).to_numpy() -If you need a NumPy array, use :meth:`Series.to_numpy` +For Series and Indexes backed by normal NumPy arrays, this will be the same thing (and the same +as ``.values``). .. ipython:: python - arr = period_array(['2000', '2001'], freq='A') - ser = pd.Series(arr) + ser = pd.Series([1, 2, 3]) + ser.array ser.to_numpy() .. _whatsnew_0240.enhancements.extension_array_operators: diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py index d5ef2e8fd6c13..f5add26f10661 100644 --- a/pandas/tests/test_base.py +++ b/pandas/tests/test_base.py @@ -1295,7 +1295,7 @@ def test_array(array, attr, box): def test_array_multiindex_raises(): idx = pd.MultiIndex.from_product([['A'], ['a', 'b']]) - with tm.assert_raises_regex(ValueError, 'MultiIndex'): + with pytest.raises(ValueError, match='MultiIndex'): idx.array From 7e43cf00f9d1617777f3479df940bdadb67eff53 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 13 Nov 2018 09:07:03 -0600 Subject: [PATCH 05/17] Squashed commit of the following: commit e4b21f60c504b33f179007412cb385517f977e28 Author: Tom Augspurger Date: Mon Nov 12 16:09:58 2018 -0600 TST: Change rops tests commit e9035505281e7ec73e5dd0a372f30124b4c6327b Author: Tom Augspurger Date: Mon Nov 12 09:31:38 2018 -0600 Add note [ci skip] ***NO CI*** commit fa8934a3082300c209b4dddc9eb7e34029b6493f Author: Tom Augspurger Date: Mon Nov 12 06:16:53 2018 -0600 update errors commit 505970eb835cf1d97006451b0840c2fed50e9fbe Merge: a30bc02e5 3592a46e5 Author: Tom Augspurger Date: Mon Nov 12 05:55:31 2018 -0600 Merge remote-tracking branch 'upstream/master' into index-ndarray-data commit a30bc02e51afc8b8c152561ec11c9f0190736aec Author: Tom Augspurger Date: Sun Nov 11 15:14:46 2018 -0600 remove assert commit 1f23ebc92f6c15bf1c2dd7d7a1ddbfc5debd81a2 Author: Tom Augspurger Date: Sun Nov 11 15:01:13 2018 -0600 BUG: Ensure that Index._data is an ndarray BUG: Ensure that Index._data is an ndarray Split from https://github.com/pandas-dev/pandas/pull/23623, where it was causing issues with infer_dtype. --- pandas/core/indexes/base.py | 6 ++++++ pandas/tests/indexes/test_base.py | 6 ++++++ pandas/tests/indexes/test_numeric.py | 6 ++++++ pandas/tests/series/test_operators.py | 28 +++++++++++++-------------- 4 files changed, 32 insertions(+), 14 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 77adcb978f640..3e4793c8f6b5e 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -522,6 +522,12 @@ def _simple_new(cls, values, name=None, dtype=None, **kwargs): values = cls(values, name=name, dtype=dtype, **kwargs)._ndarray_values + if isinstance(values, (ABCSeries, cls)): + # Index._data must always be an ndarray. + # This is no-copy for when _values is an ndarray, + # which should be always at this point. + values = np.asarray(values._values) + result = object.__new__(cls) result._data = values result.name = name diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 4a3efe22926f7..054efa00cd892 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -504,6 +504,12 @@ def test_constructor_cast(self): with pytest.raises(ValueError, match=msg): Index(["a", "b", "c"], dtype=float) + def test_constructor_unwraps_index(self): + a = pd.Index([True, False]) + b = pd.Index(a) + expected = np.array([True, False], dtype=object) + tm.assert_numpy_array_equal(b._data, expected) + def test_view_with_args(self): restricted = ['unicodeIndex', 'strIndex', 'catIndex', 'boolIndex', diff --git a/pandas/tests/indexes/test_numeric.py b/pandas/tests/indexes/test_numeric.py index c125db16bcbff..d1ad2308d19e5 100644 --- a/pandas/tests/indexes/test_numeric.py +++ b/pandas/tests/indexes/test_numeric.py @@ -628,6 +628,12 @@ def test_constructor_coercion_signed_to_unsigned(self, uint_dtype): with pytest.raises(OverflowError, match=msg): Index([-1], dtype=uint_dtype) + def test_constructor_unwraps_index(self): + idx = pd.Index([1, 2]) + result = pd.Int64Index(idx) + expected = np.array([1, 2], dtype='int64') + tm.assert_numpy_array_equal(result._data, expected) + def test_coerce_list(self): # coerce things arr = Index([1, 2, 3, 4]) diff --git a/pandas/tests/series/test_operators.py b/pandas/tests/series/test_operators.py index 4cce26d135443..bcecedc2bba97 100644 --- a/pandas/tests/series/test_operators.py +++ b/pandas/tests/series/test_operators.py @@ -189,20 +189,7 @@ def test_scalar_na_logical_ops_corners(self): operator.and_, operator.or_, operator.xor, - pytest.param(ops.rand_, - marks=pytest.mark.xfail(reason="GH#22092 Index " - "implementation returns " - "Index", - raises=AssertionError, - strict=True)), - pytest.param(ops.ror_, - marks=pytest.mark.xfail(reason="GH#22092 Index " - "implementation raises", - raises=ValueError, strict=True)), - pytest.param(ops.rxor, - marks=pytest.mark.xfail(reason="GH#22092 Index " - "implementation raises", - raises=TypeError, strict=True)) + ]) def test_logical_ops_with_index(self, op): # GH#22092, GH#19792 @@ -221,6 +208,19 @@ def test_logical_ops_with_index(self, op): result = op(ser, idx2) assert_series_equal(result, expected) + @pytest.mark.parametrize("op, expected", [ + (ops.rand_, pd.Index([False, True])), + (ops.ror_, pd.Index([False, True])), + (ops.rxor, pd.Index([])), + ]) + def test_reverse_ops_with_index(self, op, expected): + # https://github.com/pandas-dev/pandas/pull/23628 + # multi-set Index ops are buggy, so let's avoid duplicates... + ser = Series([True, False]) + idx = Index([False, True]) + result = op(ser, idx) + tm.assert_index_equal(result, expected) + def test_logical_ops_label_based(self): # GH#4947 # logical ops should be label based From bceb6123458c27b9a4faf43f5b1b0f6ad0cc4811 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 13 Nov 2018 10:05:31 -0600 Subject: [PATCH 06/17] DOC: updated docs --- doc/source/10min.rst | 13 ++++++++++++- doc/source/basics.rst | 36 +++++++++++++++++++++++++----------- doc/source/categorical.rst | 4 ++-- doc/source/dsintro.rst | 29 +++++++++++++++++++++++++++++ doc/source/enhancingperf.rst | 8 +++++--- doc/source/missing_data.rst | 2 +- doc/source/text.rst | 4 ++-- pandas/core/base.py | 29 +++++++++++++++++++---------- pandas/core/indexes/base.py | 18 +++++++++++++++++- 9 files changed, 112 insertions(+), 31 deletions(-) diff --git a/doc/source/10min.rst b/doc/source/10min.rst index b5938a24ce6c5..f074ac6011475 100644 --- a/doc/source/10min.rst +++ b/doc/source/10min.rst @@ -114,14 +114,25 @@ Here is how to view the top and bottom rows of the frame: df.head() df.tail(3) -Display the index, columns, and the underlying NumPy data: +Display the index, columns: .. ipython:: python df.index df.columns + +:attr:`DataFrame.values` gives a NumPy representation of the underlying data. +However, this can be an expensive operation when your :class:`DataFrame` has +columns with different data types. **NumPy arrays have a single dtype for +the entire array, so accessing ``df.values`` may have to coerce data**. We +recommend using ``df.values`` only when you know that your data has a single +data type. + +.. ipython:: python + df.values + :func:`~DataFrame.describe` shows a quick statistic summary of your data: .. ipython:: python diff --git a/doc/source/basics.rst b/doc/source/basics.rst index d19fcedf4e766..2ea7f4646b4a2 100644 --- a/doc/source/basics.rst +++ b/doc/source/basics.rst @@ -45,8 +45,8 @@ of elements to display is five, but you may pass a custom number. .. _basics.attrs: -Attributes and the raw ndarray(s) ---------------------------------- +Attributes and Underlying Data +------------------------------ pandas objects have a number of attributes enabling you to access the metadata @@ -64,14 +64,28 @@ Note, **these attributes can be safely assigned to**! df.columns = [x.lower() for x in df.columns] df -To get the actual data inside a data structure, one need only access the -**values** property: +Pandas objects (:class:`Index`, :class:`Series`, :class:`DataFrame`) can be +thought of as containers for arrays, which hold the actual data and do the +actual computation. For many types, the underlying array is a +:class:`numpy.ndarray`. However, pandas and 3rd party libraries may *extend* +NumPy's type system to add support for custom arrays +(see :ref:`dsintro.data_types`). + +To get the actual data inside a :class:`Index` or :class:`Series`, use +the **array** property + +.. ipython:: python + + s.array + s.index.array + +Getting the "raw data" inside a :class:`DataFrame` is possibly a bit more +complex. When your ``DataFrame`` only has a single data type for all the +columns, :atr:`DataFrame.values` will return the underlying data: .. ipython:: python - s.values - df.values - wp.values + df.values If a DataFrame or Panel contains homogeneously-typed data, the ndarray can actually be modified in-place, and the changes will be reflected in the data @@ -537,7 +551,7 @@ will exclude NAs on Series input by default: .. ipython:: python np.mean(df['one']) - np.mean(df['one'].values) + np.mean(df['one'].array) :meth:`Series.nunique` will return the number of unique non-NA values in a Series: @@ -834,7 +848,7 @@ Series operation on each column or row: tsdf = pd.DataFrame(np.random.randn(10, 3), columns=['A', 'B', 'C'], index=pd.date_range('1/1/2000', periods=10)) - tsdf.values[3:7] = np.nan + tsdf.array[3:7] = np.nan .. ipython:: python @@ -2270,11 +2284,11 @@ dtypes: 'float64': np.arange(4.0, 7.0), 'bool1': [True, False, True], 'bool2': [False, True, False], - 'dates': pd.date_range('now', periods=3).values, + 'dates': pd.date_range('now', periods=3), 'category': pd.Series(list("ABC")).astype('category')}) df['tdeltas'] = df.dates.diff() df['uint64'] = np.arange(3, 6).astype('u8') - df['other_dates'] = pd.date_range('20130101', periods=3).values + df['other_dates'] = pd.date_range('20130101', periods=3) df['tz_aware_dates'] = pd.date_range('20130101', periods=3, tz='US/Eastern') df diff --git a/doc/source/categorical.rst b/doc/source/categorical.rst index acab9de905540..4fe13ee0085f5 100644 --- a/doc/source/categorical.rst +++ b/doc/source/categorical.rst @@ -178,7 +178,7 @@ are consistent among all columns. To perform table-wise conversion, where all labels in the entire ``DataFrame`` are used as categories for each column, the ``categories`` parameter can be determined programmatically by - ``categories = pd.unique(df.values.ravel())``. + ``categories = pd.unique(df.to_numpy().ravel())``. If you already have ``codes`` and ``categories``, you can use the :func:`~pandas.Categorical.from_codes` constructor to save the factorize step @@ -942,7 +942,7 @@ Use ``.astype`` or ``union_categoricals`` to get ``category`` result. pd.concat([s1, s3]) pd.concat([s1, s3]).astype('category') - union_categoricals([s1.values, s3.values]) + union_categoricals([s1.array, s3.array]) Following table summarizes the results of ``Categoricals`` related concatenations. diff --git a/doc/source/dsintro.rst b/doc/source/dsintro.rst index 2d1ec716a1acc..3f784f9e90371 100644 --- a/doc/source/dsintro.rst +++ b/doc/source/dsintro.rst @@ -859,6 +859,35 @@ completion mechanism so they can be tab-completed: In [5]: df.fo df.foo1 df.foo2 +.. _dsintro.data_type: + +Data Types +---------- + +Pandas type system is mostly built on top of `NumPy's `__. +NumPy provides the basic arrays and data types for numeric +string, *tz-naive* datetime, and others types of data. + +Pandas and third-party libraries *extend* NumPy's type system in a few places. +This section describes the extensions pandas has made internally. +See :ref:`extending.extension-types` for how to write your own extension that +works with pandas. See :ref:`ecosystem.extensions` for a list of third-party +libraries that have implemented an extension. + +The following table lists all of pandas extension types. See the respective +documentation sections for more on each type. + +=================== ========================= ================== ============================= ============================= +Kind of Data Data Type Scalar Array Documentation +=================== ========================= ================== ============================= ============================= +tz-aware datetime :class:`DatetimeArray` :class:`Timestamp` :class:`arrays.DatetimeArray` :ref:`timeseries.timezone` +Categorical :class:`CategoricalDtype` (none) :class:`Categorical` :ref:`categorical` +period (time spans) :class:`PeriodDtype` :class:`Period` :class:`arrays.PeriodArray` :ref:`timeseries.periods` +sparse :class:`SparseDtype` (none) :class:`arrays.SparseArray` :ref:`sparse` +intervals :class:`IntervalDtype` :class:`Interval` :class:`arrays.IntervalArray` :ref:`advanced.intervalindex` +nullable integer :clsas:`Int64Dtype`, ... (none) :class:`arrays.IntegerArray` :ref:`integer_na` +=================== ========================= ================== ============================= ============================= + .. _basics.panel: Panel diff --git a/doc/source/enhancingperf.rst b/doc/source/enhancingperf.rst index 2ca8a2b7ac0f8..1c873d604cfe0 100644 --- a/doc/source/enhancingperf.rst +++ b/doc/source/enhancingperf.rst @@ -221,7 +221,7 @@ the rows, applying our ``integrate_f_typed``, and putting this in the zeros arra You can **not pass** a ``Series`` directly as a ``ndarray`` typed parameter to a Cython function. Instead pass the actual ``ndarray`` using the - ``.values`` attribute of the ``Series``. The reason is that the Cython + :meth:`Series.to_numpy`. The reason is that the Cython definition is specific to an ndarray and not the passed ``Series``. So, do not do this: @@ -230,11 +230,13 @@ the rows, applying our ``integrate_f_typed``, and putting this in the zeros arra apply_integrate_f(df['a'], df['b'], df['N']) - But rather, use ``.values`` to get the underlying ``ndarray``: + But rather, use :meth:`Series.to_numpy` to get the underlying ``ndarray``: .. code-block:: python - apply_integrate_f(df['a'].values, df['b'].values, df['N'].values) + apply_integrate_f(df['a'].to_numpy(), + df['b'].to_numpy(), + df['N'].to_numpy()) .. note:: diff --git a/doc/source/missing_data.rst b/doc/source/missing_data.rst index 4864637691607..7b6d338ee5b6a 100644 --- a/doc/source/missing_data.rst +++ b/doc/source/missing_data.rst @@ -678,7 +678,7 @@ Replacing more than one value is possible by passing a list. .. ipython:: python - df00 = df.values[0, 0] + df00 = df.iloc[0, 0] df.replace([1.5, df00], [np.nan, 'a']) df[1].dtype diff --git a/doc/source/text.rst b/doc/source/text.rst index d01c48695d0d6..6aec9ba1917a1 100644 --- a/doc/source/text.rst +++ b/doc/source/text.rst @@ -312,8 +312,8 @@ All one-dimensional list-likes can be combined in a list-like container (includi s u - s.str.cat([u.values, - u.index.astype(str).values], na_rep='-') + s.str.cat([u.array, + u.index.astype(str).array], na_rep='-') All elements must match in length to the calling ``Series`` (or ``Index``), except those having an index if ``join`` is not None: diff --git a/pandas/core/base.py b/pandas/core/base.py index 81992e77e6fb3..0af7341c8e917 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -770,10 +770,15 @@ def array(self): # type: () -> Union[np.ndarray, ExtensionArray] """The actual Array backing this Series or Index. + This differs from ``.values``, which may require converting or + copying data. + Returns ------- Union[ndarray, ExtensionArray] - This is the actual array stored within this object. + This is the actual array stored within this object. This differs + from ``.values`` which may require converting the data + to a different form. We recommend using : Notes ----- @@ -793,12 +798,13 @@ def array(self): For any 3rd-party extension types, the array type will be an ExtensionArray. - All remaining arrays (ndarrays), ``.array`` will be the ndarray + For all remaining dtypes ``.array`` will be the :class:`numpy.ndarray` stored within. See Also -------- - to_numpy : Similar method that always returns a NumPy array. + Index.to_numpy : Similar method that always returns a NumPy array. + Series.to_numpy : Similar method that always returns a NumPy array. Examples -------- @@ -819,15 +825,17 @@ def to_numpy(self): Returns ------- numpy.ndarray - An ndarray with Notes ----- - For NumPy arrays, this will be a reference to the actual data stored - in this Series or Index. + For NumPy dtypes, this will be a reference to the actual data stored + in this Series or Index. Modifying the result in place will modify + the data stored in the Series or Index (not that we recommend doing + that). - For extension types, this may involve copying data and coercing the - result to a NumPy type (possibly object), which may be expensive. + For extension types, ``to_numpy`` *may* require copying data and + coercing the result to a NumPy type (possibly object), + which may be expensive. This table lays out the different array types for each extension dtype within pandas. @@ -838,13 +846,14 @@ def to_numpy(self): category[T] ndarray[T] (same dtype as input) period ndarray[object] (Periods) interval ndarray[object] (Intervals) - IntegerNA IntegerArray[object] + IntegerNA ndarray[object] datetime64[ns, tz] datetime64[ns]? object? ================== ================================ See Also -------- - array : Get the actual data stored within. + Series.array : Get the actual data stored within. + Index.array : Get the actual data stored within. Examples -------- diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 3e4793c8f6b5e..d6d5f3076607f 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -718,7 +718,23 @@ def dtype_str(self): @property def values(self): - """ return the underlying data as an ndarray """ + """ + Return an array representing the data in the Index. + + .. warning:: + + We recommend you use :attr:`Index.array` or + :meth:`Index.to_numpy` instead of ``.values``. + + Returns + ------- + array: Union[np.ndarray, ExtensionArray] + + See Also + -------- + Index.array + Index.to_numpy + """ return self._data.view(np.ndarray) @property From c19c9bb4043f84d2f2b07039ab14d56d998355fb Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Sat, 17 Nov 2018 14:33:23 -0600 Subject: [PATCH 07/17] Added DataFrame.to_numpy --- pandas/core/frame.py | 37 +++++++++++++++++++++++++++++++++ pandas/tests/frame/test_api.py | 6 ++++++ pandas/tests/series/test_api.py | 3 +++ 3 files changed, 46 insertions(+) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 3a8ad3f98f8e0..5682ed3f2913e 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1117,6 +1117,43 @@ def from_dict(cls, data, orient='columns', dtype=None, columns=None): return cls(data, index=index, columns=columns, dtype=dtype) + def to_numpy(self): + """ + Convert the DataFrame to a NumPy array. + + The dtype of the returned array will be the common NumPy + dtype of all types in the DataFrame. This may require copying + data and coercing values, which may be expensive. + + Returns + ------- + array : numpy.ndarray + + See Also + -------- + Series.to_nummpy + Series.array + + Examples + -------- + >>> pd.DataFrame({"A": [1, 2], "B": [3, 4]}).to_numpy() + + With heterogenous data, the lowest common type will have to + be used. + + >>> df = pd.DataFrame({"A": [1, 2], "B": [3.0, 4.5]}) + >>> df.to_numpy() + + When numeric and non-numeric types, the output array will + have object dtype. + + >>> df['C'] = pd.date_range('2000', periods=2) + >>> df.to_numpy() + array([[1, 3.0, Timestamp('2000-01-01 00:00:00')], + [2, 4.5, Timestamp('2000-01-02 00:00:00')]], dtype=object) + """ + return self.values + def to_dict(self, orient='dict', into=dict): """ Convert the DataFrame to a dictionary. diff --git a/pandas/tests/frame/test_api.py b/pandas/tests/frame/test_api.py index 295a603850984..45fb36e34eb36 100644 --- a/pandas/tests/frame/test_api.py +++ b/pandas/tests/frame/test_api.py @@ -319,6 +319,12 @@ def test_values(self, float_frame, float_string_frame): expected = float_frame.reindex(columns=['A', 'B']).values assert_almost_equal(arr, expected) + def test_to_numpy(): + df = pd.DataFrame({"A": [1, 2], "B": [3, 4.5]}) + expected = np.array([[1, 2], [3, 4.5]]) + result = df.to_numpy() + tm.assert_numpy_array_equal(result, expected) + def test_transpose(self, float_frame): frame = float_frame dft = frame.T diff --git a/pandas/tests/series/test_api.py b/pandas/tests/series/test_api.py index f944d6f8c9d08..c225331a32837 100644 --- a/pandas/tests/series/test_api.py +++ b/pandas/tests/series/test_api.py @@ -305,6 +305,9 @@ def test_keys(self): def test_values(self): tm.assert_almost_equal(self.ts.values, self.ts, check_dtype=False) + def test_to_numpy(self): + pass + def test_iteritems(self): for idx, val in compat.iteritems(self.series): assert val == self.series[idx] From 86197909f85bff1710cf4fc099add8e0f9f170e0 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Sat, 17 Nov 2018 14:39:18 -0600 Subject: [PATCH 08/17] clean --- pandas/tests/frame/test_api.py | 4 ++-- pandas/tests/series/test_api.py | 3 --- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/pandas/tests/frame/test_api.py b/pandas/tests/frame/test_api.py index 45fb36e34eb36..074745429af0d 100644 --- a/pandas/tests/frame/test_api.py +++ b/pandas/tests/frame/test_api.py @@ -319,9 +319,9 @@ def test_values(self, float_frame, float_string_frame): expected = float_frame.reindex(columns=['A', 'B']).values assert_almost_equal(arr, expected) - def test_to_numpy(): + def test_to_numpy(self): df = pd.DataFrame({"A": [1, 2], "B": [3, 4.5]}) - expected = np.array([[1, 2], [3, 4.5]]) + expected = np.array([[1, 3], [2, 4.5]]) result = df.to_numpy() tm.assert_numpy_array_equal(result, expected) diff --git a/pandas/tests/series/test_api.py b/pandas/tests/series/test_api.py index c225331a32837..f944d6f8c9d08 100644 --- a/pandas/tests/series/test_api.py +++ b/pandas/tests/series/test_api.py @@ -305,9 +305,6 @@ def test_keys(self): def test_values(self): tm.assert_almost_equal(self.ts.values, self.ts, check_dtype=False) - def test_to_numpy(self): - pass - def test_iteritems(self): for idx, val in compat.iteritems(self.series): assert val == self.series[idx] From 95f19bc41d69ff74f13e24b2da88f8aa7887d62a Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 21 Nov 2018 06:53:36 -0600 Subject: [PATCH 09/17] doc update --- doc/source/10min.rst | 30 ++++++++++++++++++++------- doc/source/basics.rst | 36 ++++++++++++++++++++++----------- doc/source/dsintro.rst | 36 ++++----------------------------- doc/source/whatsnew/v0.24.0.rst | 2 ++ pandas/core/base.py | 2 +- pandas/core/frame.py | 2 +- pandas/core/generic.py | 4 ++++ pandas/core/indexes/base.py | 5 +++-- pandas/core/series.py | 9 +++++++-- 9 files changed, 69 insertions(+), 57 deletions(-) diff --git a/doc/source/10min.rst b/doc/source/10min.rst index f074ac6011475..bbd1e89af202d 100644 --- a/doc/source/10min.rst +++ b/doc/source/10min.rst @@ -121,17 +121,33 @@ Display the index, columns: df.index df.columns -:attr:`DataFrame.values` gives a NumPy representation of the underlying data. -However, this can be an expensive operation when your :class:`DataFrame` has -columns with different data types. **NumPy arrays have a single dtype for -the entire array, so accessing ``df.values`` may have to coerce data**. We -recommend using ``df.values`` only when you know that your data has a single -data type. +:meth:`DataFrame.to_numpy` gives a NumPy representation of the underlying data. +Note that his can be an expensive operation when your :class:`DataFrame` has +columns with different data types, which comes down to a fundamental difference +between pandas and NumPy: **NumPy arrays have one dtype for the entire array, +while pandas DataFrames have one dtype per column**. When you call +:meth:`DataFrame.to_numpy`, pandas will find the NumPy dtype that can hold *all* +of the dtypes in the DataFrame. This may end up being ``object``, which requires +casting every value to a Python object. + +For ``df``, our :class:`DataFrame` of all floating-point values, +:meth:`DataFrame.to_numpy` is fast and doesn't require copying data. .. ipython:: python - df.values + df.to_numpy() + +For ``df2``, the :class:`DataFrame` with multiple dtypes, +:meth:`DataFrame.to_numpy` is relatively expensive. + +.. ipython:: python + + df2.to_numpy() + +.. note:: + :meth:`DataFrame.to_numpy` does *not* include the index or column + labels in the output. :func:`~DataFrame.describe` shows a quick statistic summary of your data: diff --git a/doc/source/basics.rst b/doc/source/basics.rst index 2ea7f4646b4a2..a3a35ee6cb4b7 100644 --- a/doc/source/basics.rst +++ b/doc/source/basics.rst @@ -69,7 +69,7 @@ thought of as containers for arrays, which hold the actual data and do the actual computation. For many types, the underlying array is a :class:`numpy.ndarray`. However, pandas and 3rd party libraries may *extend* NumPy's type system to add support for custom arrays -(see :ref:`dsintro.data_types`). +(see :ref:`basics.dtypes`). To get the actual data inside a :class:`Index` or :class:`Series`, use the **array** property @@ -1951,17 +1951,29 @@ dtypes ------ For the most part, pandas uses NumPy arrays and dtypes for Series or individual -columns of a DataFrame. The main types allowed in pandas objects are ``float``, -``int``, ``bool``, and ``datetime64[ns]`` (note that NumPy does not support -timezone-aware datetimes). - -In addition to NumPy's types, pandas :ref:`extends ` -NumPy's type-system for a few cases. - -* :ref:`Categorical ` -* :ref:`Datetime with Timezone ` -* :ref:`Period ` -* :ref:`Interval ` +columns of a DataFrame. NumPy provides support for ``float``, +``int``, ``bool``, ``timedelta64[ns]`` and ``datetime64[ns]`` (note that NumPy +does not support timezone-aware datetimes). + +Pandas and third-party libraries *extend* NumPy's type system in a few places. +This section describes the extensions pandas has made internally. +See :ref:`extending.extension-types` for how to write your own extension that +works with pandas. See :ref:`ecosystem.extensions` for a list of third-party +libraries that have implemented an extension. + +The following table lists all of pandas extension types. See the respective +documentation sections for more on each type. + +=================== ========================= ================== ============================= ============================= +Kind of Data Data Type Scalar Array Documentation +=================== ========================= ================== ============================= ============================= +tz-aware datetime :class:`DatetimeArray` :class:`Timestamp` :class:`arrays.DatetimeArray` :ref:`timeseries.timezone` +Categorical :class:`CategoricalDtype` (none) :class:`Categorical` :ref:`categorical` +period (time spans) :class:`PeriodDtype` :class:`Period` :class:`arrays.PeriodArray` :ref:`timeseries.periods` +sparse :class:`SparseDtype` (none) :class:`arrays.SparseArray` :ref:`sparse` +intervals :class:`IntervalDtype` :class:`Interval` :class:`arrays.IntervalArray` :ref:`advanced.intervalindex` +nullable integer :clsas:`Int64Dtype`, ... (none) :class:`arrays.IntegerArray` :ref:`integer_na` +=================== ========================= ================== ============================= ============================= Pandas uses the ``object`` dtype for storing strings. diff --git a/doc/source/dsintro.rst b/doc/source/dsintro.rst index 3f784f9e90371..0bf54c5e7e6d4 100644 --- a/doc/source/dsintro.rst +++ b/doc/source/dsintro.rst @@ -142,7 +142,7 @@ However, operations such as slicing will also slice the index. We will address array-based indexing like ``s[[4, 3, 1]]`` in :ref:`section `. -Like a NumPy array, a pandas Series as a :attr:`Series.dtype`. +Like a NumPy array, a pandas Series has a :attr:`~Series.dtype`. .. ipython:: python @@ -151,7 +151,8 @@ Like a NumPy array, a pandas Series as a :attr:`Series.dtype`. This is often a NumPy dtype. However, pandas and 3rd-party libraries extend NumPy's type system in a few places, in which case the dtype would be a :class:`~pandas.api.extensions.ExtensionDtype`. Some examples within -pandas are :ref:`categorical` and :ref:`integer_na`. See :ref:`dsintro.data_type` for more. +pandas are :ref:`categorical` and :ref:`integer_na`. See :ref:`basics.dtypes` +for more. If you need the actual array backing a ``Series``, use :attr:`Series.array`. @@ -160,7 +161,7 @@ If you need the actual array backing a ``Series``, use :attr:`Series.array`. s.array Again, this is often a NumPy array, but may instead be a -:class:`~pandas.api.extensions.ExtensionArray`. See :ref:`dsintro.data_type` for more. +:class:`~pandas.api.extensions.ExtensionArray`. See :ref:`basics.dtypes` for more. Accessing the array can be useful when you need to do some operation without the index (to disable :ref:`automatic alignment `, for example). @@ -859,35 +860,6 @@ completion mechanism so they can be tab-completed: In [5]: df.fo df.foo1 df.foo2 -.. _dsintro.data_type: - -Data Types ----------- - -Pandas type system is mostly built on top of `NumPy's `__. -NumPy provides the basic arrays and data types for numeric -string, *tz-naive* datetime, and others types of data. - -Pandas and third-party libraries *extend* NumPy's type system in a few places. -This section describes the extensions pandas has made internally. -See :ref:`extending.extension-types` for how to write your own extension that -works with pandas. See :ref:`ecosystem.extensions` for a list of third-party -libraries that have implemented an extension. - -The following table lists all of pandas extension types. See the respective -documentation sections for more on each type. - -=================== ========================= ================== ============================= ============================= -Kind of Data Data Type Scalar Array Documentation -=================== ========================= ================== ============================= ============================= -tz-aware datetime :class:`DatetimeArray` :class:`Timestamp` :class:`arrays.DatetimeArray` :ref:`timeseries.timezone` -Categorical :class:`CategoricalDtype` (none) :class:`Categorical` :ref:`categorical` -period (time spans) :class:`PeriodDtype` :class:`Period` :class:`arrays.PeriodArray` :ref:`timeseries.periods` -sparse :class:`SparseDtype` (none) :class:`arrays.SparseArray` :ref:`sparse` -intervals :class:`IntervalDtype` :class:`Interval` :class:`arrays.IntervalArray` :ref:`advanced.intervalindex` -nullable integer :clsas:`Int64Dtype`, ... (none) :class:`arrays.IntegerArray` :ref:`integer_na` -=================== ========================= ================== ============================= ============================= - .. _basics.panel: Panel diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index af028222a1137..18728207647ab 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -67,6 +67,8 @@ as ``.values``). ser.array ser.to_numpy() +See :ref:`basics.dtypes` and :ref:`dsintro.attrs` for more. + .. _whatsnew_0240.enhancements.extension_array_operators: ``ExtensionArray`` operator support diff --git a/pandas/core/base.py b/pandas/core/base.py index a56610fddbbcf..146f99afba52d 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -778,7 +778,7 @@ def array(self): Union[ndarray, ExtensionArray] This is the actual array stored within this object. This differs from ``.values`` which may require converting the data - to a different form. We recommend using : + to a different form. Notes ----- diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 49edf2acb1f7b..8e682f0391b62 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1144,7 +1144,7 @@ def to_numpy(self): >>> df = pd.DataFrame({"A": [1, 2], "B": [3.0, 4.5]}) >>> df.to_numpy() - When numeric and non-numeric types, the output array will + For a mix of numeric and non-numeric types, the output array will have object dtype. >>> df['C'] = pd.date_range('2000', periods=2) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 97ea4fb96ce95..c82170ad04632 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -4928,6 +4928,10 @@ def values(self): """ Return a Numpy representation of the DataFrame. + .. warning:: + + We recommend using :meth:`DataFrame.to_numpy` instead. + Only the values in the DataFrame will be returned, the axes labels will be removed. diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 83eb25480d6b0..14dd3a74edf1f 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -724,8 +724,9 @@ def values(self): .. warning:: - We recommend you use :attr:`Index.array` or - :meth:`Index.to_numpy` instead of ``.values``. + We recommend using :attr:`Index.array` or + :meth:`Index.to_numpy`, depending on whether you need + a reference to the underlying data or a NumPy array. Returns ------- diff --git a/pandas/core/series.py b/pandas/core/series.py index 621db48b1ad42..1261a59480e4d 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -410,8 +410,13 @@ def ftypes(self): @property def values(self): """ - Return Series as ndarray or ndarray-like - depending on the dtype + Return Series as ndarray or ndarray-like depending on the dtype. + + .. warning:: + + We recommend using :attr:`Series.array` or + :meth:`Series.to_numpy`, depending on whether you need + a reference to the underlying data or a NumPy array. Returns ------- From 5a905abb3633aaaa4c02e536cbafc7d78be0f4e9 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 21 Nov 2018 07:04:39 -0600 Subject: [PATCH 10/17] update --- doc/source/advanced.rst | 2 +- doc/source/basics.rst | 6 +++--- doc/source/extending.rst | 2 +- doc/source/indexing.rst | 2 +- doc/source/reshaping.rst | 4 ++-- doc/source/timeseries.rst | 10 +++++----- pandas/core/base.py | 6 ++++-- 7 files changed, 17 insertions(+), 15 deletions(-) diff --git a/doc/source/advanced.rst b/doc/source/advanced.rst index 563c869eff54d..6c5d77fe70566 100644 --- a/doc/source/advanced.rst +++ b/doc/source/advanced.rst @@ -188,7 +188,7 @@ highly performant. If you want to see only the used levels, you can use the .. ipython:: python - df[['foo','qux']].columns.values + df[['foo','qux']].columns.to_numpy() # for a specific level df[['foo','qux']].columns.get_level_values(0) diff --git a/doc/source/basics.rst b/doc/source/basics.rst index a3a35ee6cb4b7..8963a387fe661 100644 --- a/doc/source/basics.rst +++ b/doc/source/basics.rst @@ -81,11 +81,11 @@ the **array** property Getting the "raw data" inside a :class:`DataFrame` is possibly a bit more complex. When your ``DataFrame`` only has a single data type for all the -columns, :atr:`DataFrame.values` will return the underlying data: +columns, :atr:`DataFrame.to_numpy` will return the underlying data: .. ipython:: python - df.values + df.to_numpy() If a DataFrame or Panel contains homogeneously-typed data, the ndarray can actually be modified in-place, and the changes will be reflected in the data @@ -2076,7 +2076,7 @@ force some *upcasting*. .. ipython:: python - df3.values.dtype + df3.to_numpy().dtype astype ~~~~~~ diff --git a/doc/source/extending.rst b/doc/source/extending.rst index 6c47d0ae8bd84..7046981a3a364 100644 --- a/doc/source/extending.rst +++ b/doc/source/extending.rst @@ -186,7 +186,7 @@ Instead, you should detect these cases and return ``NotImplemented``. When pandas encounters an operation like ``op(Series, ExtensionArray)``, pandas will -1. unbox the array from the ``Series`` (roughly ``Series.values``) +1. unbox the array from the ``Series`` (``Series.array``) 2. call ``result = op(values, ExtensionArray)`` 3. re-box the result in a ``Series`` diff --git a/doc/source/indexing.rst b/doc/source/indexing.rst index 5740ab5fa6921..dc0c6dd027b3c 100644 --- a/doc/source/indexing.rst +++ b/doc/source/indexing.rst @@ -190,7 +190,7 @@ columns. .. ipython:: python - df.loc[:,['B', 'A']] = df[['A', 'B']].values + df.loc[:,['B', 'A']] = df[['A', 'B']].to_numpy() df[['A', 'B']] diff --git a/doc/source/reshaping.rst b/doc/source/reshaping.rst index ff867a2ddfe6d..b21ea9defcdb6 100644 --- a/doc/source/reshaping.rst +++ b/doc/source/reshaping.rst @@ -26,7 +26,7 @@ Reshaping by pivoting DataFrame objects In [2]: def unpivot(frame): ...: N, K = frame.shape - ...: data = {'value' : frame.values.ravel('F'), + ...: data = {'value' : frame.to_numpy().ravel('F'), ...: 'variable' : np.asarray(frame.columns).repeat(N), ...: 'date' : np.tile(np.asarray(frame.index), K)} ...: columns = ['date', 'variable', 'value'] @@ -53,7 +53,7 @@ For the curious here is how the above ``DataFrame`` was created: def unpivot(frame): N, K = frame.shape - data = {'value': frame.values.ravel('F'), + data = {'value': frame.to_numpy().ravel('F'), 'variable': np.asarray(frame.columns).repeat(N), 'date': np.tile(np.asarray(frame.index), K)} return pd.DataFrame(data, columns=['date', 'variable', 'value']) diff --git a/doc/source/timeseries.rst b/doc/source/timeseries.rst index cc377f45c4b8d..4fa1cb8be9234 100644 --- a/doc/source/timeseries.rst +++ b/doc/source/timeseries.rst @@ -2436,22 +2436,22 @@ a convert on an aware stamp. .. note:: - Using the ``.values`` accessor on a ``Series``, returns an NumPy array of the data. + Using :meth:`Series.to_numpy` on a ``Series``, returns a NumPy array of the data. These values are converted to UTC, as NumPy does not currently support timezones (even though it is *printing* in the local timezone!). .. ipython:: python - s_naive.values - s_aware.values + s_naive.to_numpy() + s_aware.to_numpy() Further note that once converted to a NumPy array these would lose the tz tenor. .. ipython:: python - pd.Series(s_aware.values) + pd.Series(s_aware.to_numpy()) However, these can be easily converted: .. ipython:: python - pd.Series(s_aware.values).dt.tz_localize('UTC').dt.tz_convert('US/Eastern') + pd.Series(s_aware.to_numpy()).dt.tz_localize('UTC').dt.tz_convert('US/Eastern') diff --git a/pandas/core/base.py b/pandas/core/base.py index 146f99afba52d..ba1ee902d8089 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -15,7 +15,7 @@ is_list_like, is_scalar, is_extension_type, - is_extension_array_dtype) + is_extension_array_dtype, is_datetime64tz_dtype) from pandas.util._validators import validate_bool_kwarg from pandas.errors import AbstractMethodError @@ -861,7 +861,9 @@ def to_numpy(self): >>> ser.to_numpy() array(['a', 'b', 'a'], dtype=object) """ - if is_extension_array_dtype(self.dtype): + if (is_extension_array_dtype(self.dtype) or + is_datetime64tz_dtype(self.dtype)): + # TODO(DatetimeArray): remove the second clause. return np.asarray(self._values) return self._values From 1e6eed4298b8c31c39b72248c36f8ca05c1ae4e2 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 21 Nov 2018 14:15:00 -0600 Subject: [PATCH 11/17] fixed doctest --- pandas/core/frame.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 280221190899f..b88908d663851 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1137,14 +1137,16 @@ def to_numpy(self): Examples -------- >>> pd.DataFrame({"A": [1, 2], "B": [3, 4]}).to_numpy() + array([[1, 3], + [2, 4]]) With heterogenous data, the lowest common type will have to be used. >>> df = pd.DataFrame({"A": [1, 2], "B": [3.0, 4.5]}) >>> df.to_numpy() - array([[1, 3], - [2, 4]]) + array([[1. , 3. ], + [2. , 4.5]]) For a mix of numeric and non-numeric types, the output array will have object dtype. From a7a13a0f6bcffb9c1f11528c3b80b46809a9b5f9 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 27 Nov 2018 05:34:39 -0600 Subject: [PATCH 12/17] Fixed array error in docs --- doc/source/basics.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/basics.rst b/doc/source/basics.rst index f0e94aa8ed9e5..1bd6d3195a9be 100644 --- a/doc/source/basics.rst +++ b/doc/source/basics.rst @@ -853,7 +853,7 @@ Series operation on each column or row: tsdf = pd.DataFrame(np.random.randn(10, 3), columns=['A', 'B', 'C'], index=pd.date_range('1/1/2000', periods=10)) - tsdf.array[3:7] = np.nan + tsdf.iloc[3:7] = np.nan .. ipython:: python From c0a63c0fc4a75addb3a4215de997698dedfabdc1 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 27 Nov 2018 05:52:52 -0600 Subject: [PATCH 13/17] update docs --- pandas/core/base.py | 28 +++++++++++++++++++--------- pandas/core/frame.py | 2 ++ 2 files changed, 21 insertions(+), 9 deletions(-) diff --git a/pandas/core/base.py b/pandas/core/base.py index 8dc5fde52ab77..d8db20db33581 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -782,12 +782,11 @@ def array(self): # type: () -> Union[np.ndarray, ExtensionArray] """The actual Array backing this Series or Index. - This differs from ``.values``, which may require converting or - copying data. + .. versionadded:: 0.24.0 Returns ------- - Union[ndarray, ExtensionArray] + array : numpy.ndarray or ExtensionArray This is the actual array stored within this object. This differs from ``.values`` which may require converting the data to a different form. @@ -813,6 +812,14 @@ def array(self): For all remaining dtypes ``.array`` will be the :class:`numpy.ndarray` stored within. + .. note:: + + ``.array`` will always return the underlying object backing the + Series or Index. If a future version of pandas adds a specialized + extension type for a data then the return type of ``.array`` for + that data type will change from an object-dtype ndarray to the + new ExtensionArray. + See Also -------- Index.to_numpy : Similar method that always returns a NumPy array. @@ -830,6 +837,8 @@ def array(self): def to_numpy(self): """A NumPy array representing the values in this Series or Index. + .. versionadded:: 0.24.0 + The returned array will be the same up to equality (values equal in `self` will be equal in the returned array; likewise for values that are not equal). @@ -845,12 +854,12 @@ def to_numpy(self): the data stored in the Series or Index (not that we recommend doing that). - For extension types, ``to_numpy`` *may* require copying data and - coercing the result to a NumPy type (possibly object), - which may be expensive. + For extension types, ``to_numpy()`` *may* require copying data and + coercing the result to a NumPy type (possibly object), which may be + expensive. - This table lays out the different array types for each extension - dtype within pandas. + This table lays out the different dtypes and return types of + ``to_numpy()`` for various dtypes within pandas. ================== ================================ dtype array type @@ -859,13 +868,14 @@ def to_numpy(self): period ndarray[object] (Periods) interval ndarray[object] (Intervals) IntegerNA ndarray[object] - datetime64[ns, tz] datetime64[ns]? object? + datetime64[ns, tz] datetime64[ns] ================== ================================ See Also -------- Series.array : Get the actual data stored within. Index.array : Get the actual data stored within. + DataFrame.to_numpy : Similar method for DataFrame. Examples -------- diff --git a/pandas/core/frame.py b/pandas/core/frame.py index e4298a1956de0..74dc927963424 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1131,6 +1131,8 @@ def to_numpy(self): """ Convert the DataFrame to a NumPy array. + .. versionadded:: 0.24.0 + The dtype of the returned array will be the common NumPy dtype of all types in the DataFrame. This may require copying data and coercing values, which may be expensive. From 661b9ebc1b92dbcd1c561e3e064f8ec2b9c9a1ad Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 28 Nov 2018 09:11:32 -0600 Subject: [PATCH 14/17] Fixup for feedback --- doc/source/basics.rst | 34 ++++++++++++++++++++++-- doc/source/whatsnew/v0.24.0.rst | 3 +++ pandas/core/base.py | 47 +++++++++++++++++++-------------- pandas/core/frame.py | 9 ++++--- pandas/core/generic.py | 1 + pandas/core/indexes/base.py | 2 +- pandas/tests/test_base.py | 4 +-- 7 files changed, 71 insertions(+), 29 deletions(-) diff --git a/doc/source/basics.rst b/doc/source/basics.rst index 1bd6d3195a9be..25e2c8cd1ff9a 100644 --- a/doc/source/basics.rst +++ b/doc/source/basics.rst @@ -80,6 +80,21 @@ the **array** property s.array s.index.array +Depending on the data type (see :ref:`basics.dtypes`), :attr:`~Series.array` +be either a NumPy array or an :ref:`ExtensionArray `. +If you know you need a NumPy array, use :meth:`~Series.to_numpy` +or :meth:`numpy.asarray`. + +.. ipython:: python + + s.to_numpy() + np.asarray(s) + +For Series and Indexes backed by NumPy arrays (like we have here), this will +be the same as :attr:`~Series.array`. When the Series or Index is backed by +a :class:`~pandas.api.extension.ExtensionArray`, :meth:`~Series.to_numpy` +may involve copying data and coercing values. + Getting the "raw data" inside a :class:`DataFrame` is possibly a bit more complex. When your ``DataFrame`` only has a single data type for all the columns, :atr:`DataFrame.to_numpy` will return the underlying data: @@ -101,6 +116,21 @@ unlike the axis labels, cannot be assigned to. strings are involved, the result will be of object dtype. If there are only floats and integers, the resulting array will be of float dtype. +In the past, pandas recommended :attr:`Series.values` or :attr:`DataFrame.values` +for extracting the data from a Series or DataFrame. You'll still find references +to these in old code bases and online. Going forward, we recommend avoiding +``.values`` and using ``.array`` or ``.to_numpy()``. ``.values`` has the following +drawbacks: + +1. When your Series contains an :ref:`extension type `, it's + unclear whether :attr:`Series.values` returns a NumPy array or the extension array. + :attr:`Series.array` will always return the actual array backing the Series, + while :meth:`Series.to_numpy` will always return a NumPy array. +2. When your DataFrame contains a mixture of data types, :attr:`DataFrame.values` may + involve copying data and coercing values to a common dtype, a relatively expensive + operation. :meth:`DataFrame.to_numpy`, being a method, makes it clearer that the + returned NumPy array may not be a view on the same data in the DataFrame. + .. _basics.accelerate: Accelerated operations @@ -555,7 +585,7 @@ will exclude NAs on Series input by default: .. ipython:: python np.mean(df['one']) - np.mean(df['one'].array) + np.mean(df['one'].to_numpy()) :meth:`Series.nunique` will return the number of unique non-NA values in a Series: @@ -2009,7 +2039,7 @@ from the current type (e.g. ``int`` to ``float``). df3 df3.dtypes -The ``values`` attribute on a DataFrame return the *lower-common-denominator* of the dtypes, meaning +:meth:`DataFrame.to_numpy` will return the *lower-common-denominator* of the dtypes, meaning the dtype that can accommodate **ALL** of the types in the resulting homogeneous dtyped NumPy array. This can force some *upcasting*. diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index ea694595e287b..cbd334b5f8849 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -67,6 +67,9 @@ as ``.values``). ser.array ser.to_numpy() +We haven't removed or deprecated :attr:`Series.values` or :attr:`DataFrame.values`, but we +recommend and using ``.array`` or ``.to_numpy()`` instead. + See :ref:`basics.dtypes` and :ref:`dsintro.attrs` for more. .. _whatsnew_0240.enhancements.extension_array_operators: diff --git a/pandas/core/base.py b/pandas/core/base.py index d8db20db33581..86de25444cf4c 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -780,7 +780,8 @@ def base(self): @property def array(self): # type: () -> Union[np.ndarray, ExtensionArray] - """The actual Array backing this Series or Index. + """ + The actual Array backing this Series or Index. .. versionadded:: 0.24.0 @@ -791,6 +792,11 @@ def array(self): from ``.values`` which may require converting the data to a different form. + See Also + -------- + Index.to_numpy : Similar method that always returns a NumPy array. + Series.to_numpy : Similar method that always returns a NumPy array. + Notes ----- This table lays out the different array types for each extension @@ -803,28 +809,24 @@ def array(self): period PeriodArray interval IntervalArray IntegerNA IntegerArray - datetime64[ns, tz] datetime64[ns]? DatetimeArray + datetime64[ns, tz] DatetimeArray ================== ============================= For any 3rd-party extension types, the array type will be an ExtensionArray. For all remaining dtypes ``.array`` will be the :class:`numpy.ndarray` - stored within. + stored within. If you absolutely need a NumPy array (possibly with + copying / coercing data), then use :meth:`Series.to_numpy` instead. .. note:: ``.array`` will always return the underlying object backing the Series or Index. If a future version of pandas adds a specialized - extension type for a data then the return type of ``.array`` for - that data type will change from an object-dtype ndarray to the + extension type for a data type, then the return type of ``.array`` + for that data type will change from an object-dtype ndarray to the new ExtensionArray. - See Also - -------- - Index.to_numpy : Similar method that always returns a NumPy array. - Series.to_numpy : Similar method that always returns a NumPy array. - Examples -------- >>> ser = pd.Series(pd.Categorical(['a', 'b', 'a'])) @@ -835,18 +837,28 @@ def array(self): return self._values def to_numpy(self): - """A NumPy array representing the values in this Series or Index. + """ + A NumPy ndarray representing the values in this Series or Index. .. versionadded:: 0.24.0 The returned array will be the same up to equality (values equal in `self` will be equal in the returned array; likewise for values - that are not equal). + that are not equal). When `self` contains an ExtensionArray, the + dtype may be different. For example, for a category-dtype Series, + ``to_numpy()`` will return a NumPy array and the categorical dtype + will be lost. Returns ------- numpy.ndarray + See Also + -------- + Series.array : Get the actual data stored within. + Index.array : Get the actual data stored within. + DataFrame.to_numpy : Similar method for DataFrame. + Notes ----- For NumPy dtypes, this will be a reference to the actual data stored @@ -856,7 +868,8 @@ def to_numpy(self): For extension types, ``to_numpy()`` *may* require copying data and coercing the result to a NumPy type (possibly object), which may be - expensive. + expensive. When you need a no-copy reference to the underlying data, + :attr:`Series.array` should be used instead. This table lays out the different dtypes and return types of ``to_numpy()`` for various dtypes within pandas. @@ -868,15 +881,9 @@ def to_numpy(self): period ndarray[object] (Periods) interval ndarray[object] (Intervals) IntegerNA ndarray[object] - datetime64[ns, tz] datetime64[ns] + datetime64[ns, tz] ndarray[object] (Timestamps) ================== ================================ - See Also - -------- - Series.array : Get the actual data stored within. - Index.array : Get the actual data stored within. - DataFrame.to_numpy : Similar method for DataFrame. - Examples -------- >>> ser = pd.Series(pd.Categorical(['a', 'b', 'a'])) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 74dc927963424..0294eeef0da75 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1134,8 +1134,10 @@ def to_numpy(self): .. versionadded:: 0.24.0 The dtype of the returned array will be the common NumPy - dtype of all types in the DataFrame. This may require copying - data and coercing values, which may be expensive. + dtype of all types in the DataFrame. For example, + if the dtypes are ``float16`` and ``float32``, the results + dtype will be ``float32``. This may require copying data and + coercing values, which may be expensive. Returns ------- @@ -1143,8 +1145,7 @@ def to_numpy(self): See Also -------- - Series.to_nummpy - Series.array + Series.to_numpy : Similar method for Series. Examples -------- diff --git a/pandas/core/generic.py b/pandas/core/generic.py index b34850db807b5..c190e44dbbe29 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -5060,6 +5060,7 @@ def values(self): See Also -------- + DataFrame.to_numpy : Recommended alternative to this method. pandas.DataFrame.index : Retrieve the index labels. pandas.DataFrame.columns : Retrieving the column names. """ diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 29addf540841b..531b6c7794807 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -722,7 +722,7 @@ def values(self): Returns ------- - array: Union[np.ndarray, ExtensionArray] + array: numpy.ndarray or ExtensionArray See Also -------- diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py index 10ebd7ecb3596..f74def092bf62 100644 --- a/pandas/tests/test_base.py +++ b/pandas/tests/test_base.py @@ -1304,7 +1304,7 @@ def test_array_multiindex_raises(): (pd.core.arrays.period_array(['2000', '2001'], freq='D'), np.array([pd.Period('2000', freq="D"), pd.Period('2001', freq='D')])), (pd.core.arrays.integer_array([0, np.nan]), - np.array([1, np.nan], dtype=object)), + np.array([0, np.nan], dtype=object)), (pd.core.arrays.IntervalArray.from_breaks([0, 1, 2]), np.array([pd.Interval(0, 1), pd.Interval(1, 2)], dtype=object)), (pd.SparseArray([0, 1]), np.array([0, 1], dtype=np.int64)), @@ -1314,7 +1314,7 @@ def test_array_multiindex_raises(): def test_to_numpy(array, expected, box): thing = box(array) - if array.dtype.name in ('Int64', 'Sparse[int64, 0]'): + if array.dtype.name in ('Int64', 'Sparse[int64, 0]') and box is pd.Index: pytest.skip("No index type for {}".format(array.dtype)) result = thing.to_numpy() From 566a0278d58ffa3c5a1b616ea7b91bea39daeaf5 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 28 Nov 2018 09:20:09 -0600 Subject: [PATCH 15/17] skip only on index box --- pandas/tests/test_base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py index bb325298af045..47fafe2a900b4 100644 --- a/pandas/tests/test_base.py +++ b/pandas/tests/test_base.py @@ -1281,7 +1281,7 @@ def test_ndarray_values(array, expected): ]) @pytest.mark.parametrize('box', [pd.Series, pd.Index]) def test_array(array, attr, box): - if array.dtype.name in ('Int64', 'Sparse[int64, 0]'): + if array.dtype.name in ('Int64', 'Sparse[int64, 0]') and box is pd.Index: pytest.skip("No index type for {}".format(array.dtype)) result = box(array, copy=False).array From 062c49f3cfc3bc4ff87549f471434b0f31587ec1 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 28 Nov 2018 09:27:07 -0600 Subject: [PATCH 16/17] Series.values --- pandas/core/indexes/base.py | 4 ++-- pandas/core/series.py | 11 +++++++++++ 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 531b6c7794807..8954a27e8feca 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -726,8 +726,8 @@ def values(self): See Also -------- - Index.array - Index.to_numpy + Index.array : Reference to the underlying data. + Index.to_numpy : A NumPy array representing the underlying data. """ return self._data.view(np.ndarray) diff --git a/pandas/core/series.py b/pandas/core/series.py index 4b8274a9e8333..91e526a3f5fdf 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -426,10 +426,21 @@ def values(self): """ Return Series as ndarray or ndarray-like depending on the dtype. + .. warning:: + + We recommend using :attr:`Series.array` or + :Series:`Index.to_numpy`, depending on whether you need + a reference to the underlying data or a NumPy array. + Returns ------- arr : numpy.ndarray or ndarray-like + See Also + -------- + Series.array : Reference to the underlying data. + Series.to_numpy : A NumPy array representing the underlying data. + Examples -------- >>> pd.Series([1, 2, 3]).values From e805c26275ce973b9da85c6b2e70909019b79c5b Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 28 Nov 2018 16:32:15 -0600 Subject: [PATCH 17/17] remove stale todo --- pandas/core/indexes/multi.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index f52bb4691b6a1..567834b04c1ca 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -432,7 +432,6 @@ def levels(self): @property def _values(self): - # TODO: remove # We override here, since our parent uses _data, which we dont' use. return self.values