From 670e76892cd763f446384869fb2794263542e31f Mon Sep 17 00:00:00 2001 From: Thierry Moisan Date: Tue, 11 Sep 2018 20:38:28 -0400 Subject: [PATCH 1/4] DOC: Fix DataFrame.to_xarray doctests and allow the CI to run it. --- ci/doctests.sh | 2 +- pandas/core/generic.py | 72 +++++++++++++++++++++++------------------- 2 files changed, 40 insertions(+), 34 deletions(-) diff --git a/ci/doctests.sh b/ci/doctests.sh index 2af5dbd26aeb1..505fc44749753 100755 --- a/ci/doctests.sh +++ b/ci/doctests.sh @@ -35,7 +35,7 @@ if [ "$DOCTEST" ]; then fi pytest --doctest-modules -v pandas/core/generic.py \ - -k"-_set_axis_name -_xs -describe -droplevel -groupby -interpolate -pct_change -pipe -reindex -reindex_axis -resample -sample -to_json -to_xarray -transform -transpose -values -xs" + -k"-_set_axis_name -_xs -describe -droplevel -groupby -interpolate -pct_change -pipe -reindex -reindex_axis -resample -sample -to_json -transform -transpose -values -xs" if [ $? -ne "0" ]; then RET=1 diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 2e5da21f573b0..dedfb49288b4d 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2492,17 +2492,25 @@ def to_xarray(self): """ Return an xarray object from the pandas object. + Return the xarray equivalent of the pandas object. `xarray + `__ is a + Python package that allows to handle N-dimensional data. + Returns ------- a DataArray for a Series a Dataset for a DataFrame a DataArray for higher dims + See also + -------- + DataFrame.to_csv : Write out to a csv file. + Examples -------- >>> df = pd.DataFrame({'A' : [1, 1, 2], - 'B' : ['foo', 'bar', 'foo'], - 'C' : np.arange(4.,7)}) + ... 'B' : ['foo', 'bar', 'foo'], + ... 'C' : np.arange(4.,7)}) >>> df A B C 0 1 foo 4.0 @@ -2520,9 +2528,9 @@ def to_xarray(self): C (index) float64 4.0 5.0 6.0 >>> df = pd.DataFrame({'A' : [1, 1, 2], - 'B' : ['foo', 'bar', 'foo'], - 'C' : np.arange(4.,7)} - ).set_index(['B','A']) + ... 'B' : ['foo', 'bar', 'foo'], + ... 'C' : np.arange(4.,7)} + ... ).set_index(['B','A']) >>> df C B A @@ -2539,35 +2547,33 @@ def to_xarray(self): Data variables: C (B, A) float64 5.0 nan 4.0 6.0 - >>> p = pd.Panel(np.arange(24).reshape(4,3,2), - items=list('ABCD'), - major_axis=pd.date_range('20130101', periods=3), - minor_axis=['first', 'second']) - >>> p - - Dimensions: 4 (items) x 3 (major_axis) x 2 (minor_axis) - Items axis: A to D - Major_axis axis: 2013-01-01 00:00:00 to 2013-01-03 00:00:00 - Minor_axis axis: first to second - - >>> p.to_xarray() - - array([[[ 0, 1], - [ 2, 3], - [ 4, 5]], - [[ 6, 7], - [ 8, 9], - [10, 11]], - [[12, 13], - [14, 15], - [16, 17]], - [[18, 19], - [20, 21], - [22, 23]]]) + >>> index = pd.MultiIndex(levels=[['bar', 'baz', 'foo', 'qux'], + ... ['one', 'two']], + ... labels=[[0, 0, 1, 1, 2, 2, 3, 3], [0, 1, 0, 1, 0, 1, 0, 1]], + ... names=['first', 'second']) + + >>> s = pd.Series(np.arange(8), index=index) + >>> s + first second + bar one 0 + two 1 + baz one 2 + two 3 + foo one 4 + two 5 + qux one 6 + two 7 + dtype: int64 + + >>> s.to_xarray() + + array([[0, 1], + [2, 3], + [4, 5], + [6, 7]]) Coordinates: - * items (items) object 'A' 'B' 'C' 'D' - * major_axis (major_axis) datetime64[ns] 2013-01-01 2013-01-02 2013-01-03 # noqa - * minor_axis (minor_axis) object 'first' 'second' + * first (first) object 'bar' 'baz' 'foo' 'qux' + * second (second) object 'one' 'two' Notes ----- From a7ecbb2387da844191d59ae4d942cf8d5f3324a8 Mon Sep 17 00:00:00 2001 From: Thierry Moisan Date: Tue, 11 Sep 2018 21:21:36 -0400 Subject: [PATCH 2/4] Remove extended summary from to_xarray docstring --- pandas/core/generic.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index dedfb49288b4d..f3020cb0cff55 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2492,10 +2492,6 @@ def to_xarray(self): """ Return an xarray object from the pandas object. - Return the xarray equivalent of the pandas object. `xarray - `__ is a - Python package that allows to handle N-dimensional data. - Returns ------- a DataArray for a Series From ce5098aa8ae81a8d2494bb2a7fe762b13244a727 Mon Sep 17 00:00:00 2001 From: Thierry Moisan Date: Thu, 13 Sep 2018 16:30:34 -0400 Subject: [PATCH 3/4] Refactor the examples and the See Also section of to_xarray docstring --- pandas/core/generic.py | 109 ++++++++++++++++++++++------------------- 1 file changed, 59 insertions(+), 50 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index f3020cb0cff55..a2a58973177a0 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2494,81 +2494,90 @@ def to_xarray(self): Returns ------- - a DataArray for a Series - a Dataset for a DataFrame - a DataArray for higher dims + xarray.DataArray or xarray.Dataset + Data in the pandas structure converted to Dataset if the object is + a DataFrame, or a DataArray if the object is a Series. - See also + See Also -------- - DataFrame.to_csv : Write out to a csv file. + DataFrame.to_hdf : Write DataFrame to an HDF5 file. + DataFrame.to_parquet : Write a DataFrame to the binary parquet format. Examples -------- - >>> df = pd.DataFrame({'A' : [1, 1, 2], - ... 'B' : ['foo', 'bar', 'foo'], - ... 'C' : np.arange(4.,7)}) + >>> df = pd.DataFrame([('falcon', 'bird', 389.0, 2), + ... ('parrot', 'bird', 24.0, 2), + ... ('lion', 'mammal', 80.5, 4), + ... ('monkey', 'mammal', np.nan, 4)], + ... columns=['name', 'class', 'max_speed', + ... 'num_legs'], + ... index=[0, 2, 3, 1]) >>> df - A B C - 0 1 foo 4.0 - 1 1 bar 5.0 - 2 2 foo 6.0 + name class max_speed num_legs + 0 falcon bird 389.0 2 + 2 parrot bird 24.0 2 + 3 lion mammal 80.5 4 + 1 monkey mammal NaN 4 >>> df.to_xarray() - Dimensions: (index: 3) + Dimensions: (index: 4) Coordinates: - * index (index) int64 0 1 2 + * index (index) int64 0 2 3 1 Data variables: - A (index) int64 1 1 2 - B (index) object 'foo' 'bar' 'foo' - C (index) float64 4.0 5.0 6.0 - - >>> df = pd.DataFrame({'A' : [1, 1, 2], - ... 'B' : ['foo', 'bar', 'foo'], - ... 'C' : np.arange(4.,7)} - ... ).set_index(['B','A']) - >>> df - C - B A - foo 1 4.0 - bar 1 5.0 - foo 2 6.0 - - >>> df.to_xarray() + name (index) object 'falcon' 'parrot' 'lion' 'monkey' + class (index) object 'bird' 'bird' 'mammal' 'mammal' + max_speed (index) float64 389.0 24.0 80.5 nan + num_legs (index) int64 2 2 4 4 + + >>> df_multiindex = df.set_index(['class', 'name']) + >>> df_multiindex + max_speed num_legs + class name + bird falcon 389.0 2 + parrot 24.0 2 + mammal lion 80.5 4 + monkey NaN 4 + + >>> df_multiindex.to_xarray() - Dimensions: (A: 2, B: 2) + Dimensions: (class: 2, name: 4) Coordinates: - * B (B) object 'bar' 'foo' - * A (A) int64 1 2 + * class (class) object 'bird' 'mammal' + * name (name) object 'falcon' 'lion' 'monkey' 'parrot' Data variables: - C (B, A) float64 5.0 nan 4.0 6.0 + max_speed (class, name) float64 389.0 nan nan 24.0 nan 80.5 nan nan + num_legs (class, name) float64 2.0 nan nan 2.0 nan 4.0 4.0 nan - >>> index = pd.MultiIndex(levels=[['bar', 'baz', 'foo', 'qux'], + >>> index = pd.MultiIndex(levels=[[pd.to_datetime("2018-01-01"), + ... pd.to_datetime("2015-05-23"), pd.to_datetime("2015-06-06"), + ... pd.to_datetime("2011-02-13"), pd.to_datetime("2014-07-06")], ... ['one', 'two']], ... labels=[[0, 0, 1, 1, 2, 2, 3, 3], [0, 1, 0, 1, 0, 1, 0, 1]], ... names=['first', 'second']) >>> s = pd.Series(np.arange(8), index=index) >>> s - first second - bar one 0 - two 1 - baz one 2 - two 3 - foo one 4 - two 5 - qux one 6 - two 7 + first second + 2018-01-01 one 0 + two 1 + 2015-05-23 one 2 + two 3 + 2015-06-06 one 4 + two 5 + 2011-02-13 one 6 + two 7 dtype: int64 >>> s.to_xarray() - - array([[0, 1], - [2, 3], - [4, 5], - [6, 7]]) + + array([[ 0., 1.], + [ 2., 3.], + [ 4., 5.], + [ 6., 7.], + [nan, nan]]) Coordinates: - * first (first) object 'bar' 'baz' 'foo' 'qux' + * first (first) datetime64[ns] 2018-01-01 2015-05-23 2015-06-06 ... * second (second) object 'one' 'two' Notes From 08561d2b0890e8d4e33de779e88412599a695457 Mon Sep 17 00:00:00 2001 From: Thierry Moisan Date: Fri, 14 Sep 2018 16:39:38 -0400 Subject: [PATCH 4/4] Simplify to_xarray doctests --- pandas/core/generic.py | 79 ++++++++++++++++-------------------------- 1 file changed, 29 insertions(+), 50 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index a2a58973177a0..b2cc73948c092 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2510,75 +2510,54 @@ def to_xarray(self): ... ('lion', 'mammal', 80.5, 4), ... ('monkey', 'mammal', np.nan, 4)], ... columns=['name', 'class', 'max_speed', - ... 'num_legs'], - ... index=[0, 2, 3, 1]) + ... 'num_legs']) >>> df - name class max_speed num_legs + name class max_speed num_legs 0 falcon bird 389.0 2 - 2 parrot bird 24.0 2 - 3 lion mammal 80.5 4 - 1 monkey mammal NaN 4 + 1 parrot bird 24.0 2 + 2 lion mammal 80.5 4 + 3 monkey mammal NaN 4 >>> df.to_xarray() Dimensions: (index: 4) Coordinates: - * index (index) int64 0 2 3 1 + * index (index) int64 0 1 2 3 Data variables: name (index) object 'falcon' 'parrot' 'lion' 'monkey' class (index) object 'bird' 'bird' 'mammal' 'mammal' max_speed (index) float64 389.0 24.0 80.5 nan num_legs (index) int64 2 2 4 4 - >>> df_multiindex = df.set_index(['class', 'name']) + >>> df['max_speed'].to_xarray() + + array([389. , 24. , 80.5, nan]) + Coordinates: + * index (index) int64 0 1 2 3 + + >>> dates = pd.to_datetime(['2018-01-01', '2018-01-01', + ... '2018-01-02', '2018-01-02']) + >>> df_multiindex = pd.DataFrame({'date': dates, + ... 'animal': ['falcon', 'parrot', 'falcon', + ... 'parrot'], + ... 'speed': [350, 18, 361, 15]}).set_index(['date', + ... 'animal']) >>> df_multiindex - max_speed num_legs - class name - bird falcon 389.0 2 - parrot 24.0 2 - mammal lion 80.5 4 - monkey NaN 4 + speed + date animal + 2018-01-01 falcon 350 + parrot 18 + 2018-01-02 falcon 361 + parrot 15 >>> df_multiindex.to_xarray() - Dimensions: (class: 2, name: 4) + Dimensions: (animal: 2, date: 2) Coordinates: - * class (class) object 'bird' 'mammal' - * name (name) object 'falcon' 'lion' 'monkey' 'parrot' + * date (date) datetime64[ns] 2018-01-01 2018-01-02 + * animal (animal) object 'falcon' 'parrot' Data variables: - max_speed (class, name) float64 389.0 nan nan 24.0 nan 80.5 nan nan - num_legs (class, name) float64 2.0 nan nan 2.0 nan 4.0 4.0 nan - - >>> index = pd.MultiIndex(levels=[[pd.to_datetime("2018-01-01"), - ... pd.to_datetime("2015-05-23"), pd.to_datetime("2015-06-06"), - ... pd.to_datetime("2011-02-13"), pd.to_datetime("2014-07-06")], - ... ['one', 'two']], - ... labels=[[0, 0, 1, 1, 2, 2, 3, 3], [0, 1, 0, 1, 0, 1, 0, 1]], - ... names=['first', 'second']) - - >>> s = pd.Series(np.arange(8), index=index) - >>> s - first second - 2018-01-01 one 0 - two 1 - 2015-05-23 one 2 - two 3 - 2015-06-06 one 4 - two 5 - 2011-02-13 one 6 - two 7 - dtype: int64 - - >>> s.to_xarray() - - array([[ 0., 1.], - [ 2., 3.], - [ 4., 5.], - [ 6., 7.], - [nan, nan]]) - Coordinates: - * first (first) datetime64[ns] 2018-01-01 2015-05-23 2015-06-06 ... - * second (second) object 'one' 'two' + speed (date, animal) int64 350 18 361 15 Notes -----