From 1e8ac354f0de1d685ce39236c914218a6e8f4fe1 Mon Sep 17 00:00:00 2001 From: Thomas Kluyver Date: Fri, 6 Sep 2019 12:12:25 +0200 Subject: [PATCH 01/16] Move hypothesis deadline configuration to conftest.py --- properties/conftest.py | 5 +++++ properties/test_encode_decode.py | 7 +------ 2 files changed, 6 insertions(+), 6 deletions(-) create mode 100644 properties/conftest.py diff --git a/properties/conftest.py b/properties/conftest.py new file mode 100644 index 00000000000..2bdae47bf43 --- /dev/null +++ b/properties/conftest.py @@ -0,0 +1,5 @@ +from hypothesis import settings + +# Run for a while - arrays are a bigger search space than usual +settings.register_profile("ci", deadline=None) +settings.load_profile("ci") diff --git a/properties/test_encode_decode.py b/properties/test_encode_decode.py index b8f52e3de7a..adeeda4f28e 100644 --- a/properties/test_encode_decode.py +++ b/properties/test_encode_decode.py @@ -6,15 +6,10 @@ """ import hypothesis.extra.numpy as npst import hypothesis.strategies as st -from hypothesis import given, settings +from hypothesis import given import xarray as xr -# Run for a while - arrays are a bigger search space than usual -settings.register_profile("ci", deadline=None) -settings.load_profile("ci") - - an_array = npst.arrays( dtype=st.one_of( npst.unsigned_integer_dtypes(), npst.integer_dtypes(), npst.floating_dtypes() From 9f14426bef13802c86d98fca716f475921fc075e Mon Sep 17 00:00:00 2001 From: Thomas Kluyver Date: Fri, 6 Sep 2019 12:13:04 +0200 Subject: [PATCH 02/16] Add simple roundtrip test for xarray-pandas-xarray --- properties/test_pandas_roundtrip.py | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 properties/test_pandas_roundtrip.py diff --git a/properties/test_pandas_roundtrip.py b/properties/test_pandas_roundtrip.py new file mode 100644 index 00000000000..e7c360372ea --- /dev/null +++ b/properties/test_pandas_roundtrip.py @@ -0,0 +1,29 @@ +""" +Property-based tests for roundtripping between xarray and pandas objects. +""" +import hypothesis.extra.numpy as npst +import hypothesis.strategies as st +from hypothesis import given + +import numpy as np +import xarray as xr + +an_array = npst.arrays( + dtype=st.one_of( + npst.unsigned_integer_dtypes(), npst.integer_dtypes(), npst.floating_dtypes() + ), + shape=npst.array_shapes(max_dims=2), # can only convert 1D/2D to pandas +) + + +@given(st.data(), an_array) +def test_roundtrip_dataarray(data, arr): + names = data.draw( + st.lists(st.text(), min_size=arr.ndim, max_size=arr.ndim, unique=True).map( + tuple + ) + ) + coords = {name: np.arange(n) for (name, n) in zip(names, arr.shape)} + original = xr.DataArray(arr, dims=names, coords=coords) + roundtripped = xr.DataArray(original.to_pandas()) + xr.testing.assert_identical(original, roundtripped) From 18790cc9e2ca4a0d4819e337a41dd16ffc9cb88e Mon Sep 17 00:00:00 2001 From: Thomas Kluyver Date: Fri, 6 Sep 2019 13:50:17 +0200 Subject: [PATCH 03/16] Test roundtrip pd.Series->DataArray->Series --- properties/test_pandas_roundtrip.py | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/properties/test_pandas_roundtrip.py b/properties/test_pandas_roundtrip.py index e7c360372ea..c29665ef958 100644 --- a/properties/test_pandas_roundtrip.py +++ b/properties/test_pandas_roundtrip.py @@ -2,16 +2,22 @@ Property-based tests for roundtripping between xarray and pandas objects. """ import hypothesis.extra.numpy as npst +import hypothesis.extra.pandas as pdst import hypothesis.strategies as st from hypothesis import given import numpy as np +import pandas as pd import xarray as xr +numeric_dtypes = st.one_of( + npst.unsigned_integer_dtypes(), npst.integer_dtypes(), npst.floating_dtypes() +) + +numeric_series = numeric_dtypes.flatmap(lambda dt: pdst.series(dtype=dt)) + an_array = npst.arrays( - dtype=st.one_of( - npst.unsigned_integer_dtypes(), npst.integer_dtypes(), npst.floating_dtypes() - ), + dtype=numeric_dtypes, shape=npst.array_shapes(max_dims=2), # can only convert 1D/2D to pandas ) @@ -27,3 +33,11 @@ def test_roundtrip_dataarray(data, arr): original = xr.DataArray(arr, dims=names, coords=coords) roundtripped = xr.DataArray(original.to_pandas()) xr.testing.assert_identical(original, roundtripped) + +@given(numeric_series, st.text()) +def test_roundtrip_pandas_series(ser, name): + # Need to name the index, otherwise Xarray calls it 'dim_0'. + ser.index.name = name + arr = xr.DataArray(ser) + roundtripped = arr.to_pandas() + pd.testing.assert_series_equal(ser, roundtripped) From 2449ac2e6be9a33f8d986a260dc522395e2060c5 Mon Sep 17 00:00:00 2001 From: Thomas Kluyver Date: Fri, 6 Sep 2019 14:03:28 +0200 Subject: [PATCH 04/16] Test roundtrip DataFrame->DataArray->DataFrame --- properties/test_pandas_roundtrip.py | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/properties/test_pandas_roundtrip.py b/properties/test_pandas_roundtrip.py index c29665ef958..21856f8584a 100644 --- a/properties/test_pandas_roundtrip.py +++ b/properties/test_pandas_roundtrip.py @@ -34,10 +34,26 @@ def test_roundtrip_dataarray(data, arr): roundtripped = xr.DataArray(original.to_pandas()) xr.testing.assert_identical(original, roundtripped) + @given(numeric_series, st.text()) -def test_roundtrip_pandas_series(ser, name): +def test_roundtrip_pandas_series(ser, ix_name): # Need to name the index, otherwise Xarray calls it 'dim_0'. - ser.index.name = name + ser.index.name = ix_name arr = xr.DataArray(ser) roundtripped = arr.to_pandas() pd.testing.assert_series_equal(ser, roundtripped) + + +numeric_homogeneous_dataframe = numeric_dtypes.flatmap( + lambda dt: pdst.data_frames(columns=pdst.columns(["a", "b", "c"], dtype=dt)) +) + + +@given(numeric_homogeneous_dataframe) +def test_roundtrip_pandas_dataframe(df): + # Need to name the indexes, otherwise Xarray names them 'dim_0', 'dim_1'. + df.index.name = "rows" + df.columns.name = "cols" + arr = xr.DataArray(df) + roundtripped = arr.to_pandas() + pd.testing.assert_frame_equal(df, roundtripped) From 54900f0736b58946c486ef252c7d7ab437212b7a Mon Sep 17 00:00:00 2001 From: Thomas Kluyver Date: Fri, 6 Sep 2019 14:47:06 +0200 Subject: [PATCH 05/16] Test roundtrip Dataset->Dataframe->Dataset --- properties/test_pandas_roundtrip.py | 32 +++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/properties/test_pandas_roundtrip.py b/properties/test_pandas_roundtrip.py index 21856f8584a..71b9fa8f226 100644 --- a/properties/test_pandas_roundtrip.py +++ b/properties/test_pandas_roundtrip.py @@ -22,6 +22,29 @@ ) +@st.composite +def datasets_1d_vars(draw): + """Generate datasets with only 1D variables + + Suitable for converting to pandas dataframes. + """ + n_vars = draw(st.integers(min_value=1, max_value=3)) + n_entries = draw(st.integers(min_value=1, max_value=100)) + dims = ("rows",) + vars = {} + for _ in range(n_vars): + name = draw(st.text(min_size=1)) + dt = draw(numeric_dtypes) + arr = draw(npst.arrays(dtype=dt, shape=(n_entries,))) + vars[name] = xr.Variable(dims, arr) + + coords = { + dims[0]: draw(pdst.indexes(dtype="u8", min_size=n_entries, max_size=n_entries)) + } + + return xr.Dataset(vars, coords=coords) + + @given(st.data(), an_array) def test_roundtrip_dataarray(data, arr): names = data.draw( @@ -35,6 +58,14 @@ def test_roundtrip_dataarray(data, arr): xr.testing.assert_identical(original, roundtripped) +@given(datasets_1d_vars()) +def test_roundtrip_dataset(dataset): + df = dataset.to_dataframe() + assert isinstance(df, pd.DataFrame) + roundtripped = xr.Dataset(df) + xr.testing.assert_identical(dataset, roundtripped) + + @given(numeric_series, st.text()) def test_roundtrip_pandas_series(ser, ix_name): # Need to name the index, otherwise Xarray calls it 'dim_0'. @@ -44,6 +75,7 @@ def test_roundtrip_pandas_series(ser, ix_name): pd.testing.assert_series_equal(ser, roundtripped) +# Dataframes with columns of all the same dtype - for roundtrip to DataArray numeric_homogeneous_dataframe = numeric_dtypes.flatmap( lambda dt: pdst.data_frames(columns=pdst.columns(["a", "b", "c"], dtype=dt)) ) From 02fd31118c1004317ecc52763253c2ea3a9a2ada Mon Sep 17 00:00:00 2001 From: Thomas Kluyver Date: Fri, 6 Sep 2019 14:48:57 +0200 Subject: [PATCH 06/16] Relax to allow 0 entries in each dataset var --- properties/test_pandas_roundtrip.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/properties/test_pandas_roundtrip.py b/properties/test_pandas_roundtrip.py index 71b9fa8f226..c7f80bce23c 100644 --- a/properties/test_pandas_roundtrip.py +++ b/properties/test_pandas_roundtrip.py @@ -29,7 +29,7 @@ def datasets_1d_vars(draw): Suitable for converting to pandas dataframes. """ n_vars = draw(st.integers(min_value=1, max_value=3)) - n_entries = draw(st.integers(min_value=1, max_value=100)) + n_entries = draw(st.integers(min_value=0, max_value=100)) dims = ("rows",) vars = {} for _ in range(n_vars): From e8fb3dad62bb15525f30411cac8abfbfb2239778 Mon Sep 17 00:00:00 2001 From: Thomas Kluyver Date: Fri, 6 Sep 2019 14:56:33 +0200 Subject: [PATCH 07/16] Relax to allow empty string names --- properties/test_pandas_roundtrip.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/properties/test_pandas_roundtrip.py b/properties/test_pandas_roundtrip.py index c7f80bce23c..663a22c7547 100644 --- a/properties/test_pandas_roundtrip.py +++ b/properties/test_pandas_roundtrip.py @@ -33,7 +33,7 @@ def datasets_1d_vars(draw): dims = ("rows",) vars = {} for _ in range(n_vars): - name = draw(st.text(min_size=1)) + name = draw(st.text(min_size=0)) dt = draw(numeric_dtypes) arr = draw(npst.arrays(dtype=dt, shape=(n_entries,))) vars[name] = xr.Variable(dims, arr) From 67c70346d10fbb9a944f7af1838ee4c9f8c46bf7 Mon Sep 17 00:00:00 2001 From: Thomas Kluyver Date: Fri, 11 Oct 2019 10:05:29 +0100 Subject: [PATCH 08/16] Add print_blob to config --- properties/conftest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/properties/conftest.py b/properties/conftest.py index 2bdae47bf43..93c35e07b70 100644 --- a/properties/conftest.py +++ b/properties/conftest.py @@ -1,5 +1,5 @@ from hypothesis import settings # Run for a while - arrays are a bigger search space than usual -settings.register_profile("ci", deadline=None) +settings.register_profile("ci", deadline=None, print_blob=True) settings.load_profile("ci") From 4ba4f7bc04a4423b96b22dda64e7ad1e67e16fc8 Mon Sep 17 00:00:00 2001 From: Thomas Kluyver Date: Fri, 11 Oct 2019 10:18:27 +0100 Subject: [PATCH 09/16] Extra half-roundtrip from pandas series to xarray --- properties/test_pandas_roundtrip.py | 1 + 1 file changed, 1 insertion(+) diff --git a/properties/test_pandas_roundtrip.py b/properties/test_pandas_roundtrip.py index 663a22c7547..8324d999ece 100644 --- a/properties/test_pandas_roundtrip.py +++ b/properties/test_pandas_roundtrip.py @@ -73,6 +73,7 @@ def test_roundtrip_pandas_series(ser, ix_name): arr = xr.DataArray(ser) roundtripped = arr.to_pandas() pd.testing.assert_series_equal(ser, roundtripped) + xr.testing.assert_identical(arr, roundtripped.to_xarray()) # Dataframes with columns of all the same dtype - for roundtrip to DataArray From fb222c572776cebde42d8bf66bca80aa361470f5 Mon Sep 17 00:00:00 2001 From: Thomas Kluyver Date: Fri, 11 Oct 2019 10:19:16 +0100 Subject: [PATCH 10/16] Extra half roundtrip from pandas dataframe to Xarray --- properties/test_pandas_roundtrip.py | 1 + 1 file changed, 1 insertion(+) diff --git a/properties/test_pandas_roundtrip.py b/properties/test_pandas_roundtrip.py index 8324d999ece..6635a641894 100644 --- a/properties/test_pandas_roundtrip.py +++ b/properties/test_pandas_roundtrip.py @@ -90,3 +90,4 @@ def test_roundtrip_pandas_dataframe(df): arr = xr.DataArray(df) roundtripped = arr.to_pandas() pd.testing.assert_frame_equal(df, roundtripped) + xr.testing.assert_identical(arr, roundtripped.to_xarray()) From 7b39a6f1cb710582a99ce9824afea1fc17842915 Mon Sep 17 00:00:00 2001 From: Thomas Kluyver Date: Sat, 12 Oct 2019 10:35:27 +0100 Subject: [PATCH 11/16] Redesign strategy for generating datasets with 1D variables Following suggestions from @Zac-HD --- properties/test_pandas_roundtrip.py | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/properties/test_pandas_roundtrip.py b/properties/test_pandas_roundtrip.py index 6635a641894..b28636572f8 100644 --- a/properties/test_pandas_roundtrip.py +++ b/properties/test_pandas_roundtrip.py @@ -1,6 +1,7 @@ """ Property-based tests for roundtripping between xarray and pandas objects. """ +from functools import partial import hypothesis.extra.numpy as npst import hypothesis.extra.pandas as pdst import hypothesis.strategies as st @@ -28,21 +29,18 @@ def datasets_1d_vars(draw): Suitable for converting to pandas dataframes. """ - n_vars = draw(st.integers(min_value=1, max_value=3)) - n_entries = draw(st.integers(min_value=0, max_value=100)) - dims = ("rows",) - vars = {} - for _ in range(n_vars): - name = draw(st.text(min_size=0)) - dt = draw(numeric_dtypes) - arr = draw(npst.arrays(dtype=dt, shape=(n_entries,))) - vars[name] = xr.Variable(dims, arr) - - coords = { - dims[0]: draw(pdst.indexes(dtype="u8", min_size=n_entries, max_size=n_entries)) - } - - return xr.Dataset(vars, coords=coords) + # Generate an index for the dataset + idx = draw(pdst.indexes(dtype="u8", min_size=0, max_size=100)) + + # Generate 1-3 variables, 1D with the same length as the index + vars_strategy = st.dictionaries( + keys=st.text(), + values=npst.arrays(dtype=numeric_dtypes, shape=len(idx)) + .map(partial(xr.Variable, ("rows",))), + min_size=1, + max_size=3, + ) + return xr.Dataset(draw(vars_strategy), coords={"rows": idx}) @given(st.data(), an_array) From a328739f6da294130756a6355d009c4a41c655f3 Mon Sep 17 00:00:00 2001 From: Thomas Kluyver Date: Sat, 12 Oct 2019 10:37:53 +0100 Subject: [PATCH 12/16] Make pep8 happy --- properties/test_pandas_roundtrip.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/properties/test_pandas_roundtrip.py b/properties/test_pandas_roundtrip.py index b28636572f8..5b8f56708aa 100644 --- a/properties/test_pandas_roundtrip.py +++ b/properties/test_pandas_roundtrip.py @@ -36,7 +36,7 @@ def datasets_1d_vars(draw): vars_strategy = st.dictionaries( keys=st.text(), values=npst.arrays(dtype=numeric_dtypes, shape=len(idx)) - .map(partial(xr.Variable, ("rows",))), + .map(partial(xr.Variable, ("rows",))), min_size=1, max_size=3, ) From ecd016a3d2e39d00bc73355af1ba401ba3617430 Mon Sep 17 00:00:00 2001 From: Thomas Kluyver Date: Tue, 29 Oct 2019 14:39:16 +0000 Subject: [PATCH 13/16] Autoformat test file --- properties/test_pandas_roundtrip.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/properties/test_pandas_roundtrip.py b/properties/test_pandas_roundtrip.py index 5b8f56708aa..537f8202f0a 100644 --- a/properties/test_pandas_roundtrip.py +++ b/properties/test_pandas_roundtrip.py @@ -35,8 +35,9 @@ def datasets_1d_vars(draw): # Generate 1-3 variables, 1D with the same length as the index vars_strategy = st.dictionaries( keys=st.text(), - values=npst.arrays(dtype=numeric_dtypes, shape=len(idx)) - .map(partial(xr.Variable, ("rows",))), + values=npst.arrays(dtype=numeric_dtypes, shape=len(idx)).map( + partial(xr.Variable, ("rows",)) + ), min_size=1, max_size=3, ) From 351b40bf7b0d46de7d0cd909b8fdfba5a7e8d69b Mon Sep 17 00:00:00 2001 From: Thomas Kluyver Date: Tue, 29 Oct 2019 15:25:59 +0000 Subject: [PATCH 14/16] Skip hypothesis tests if hypothesis not available --- properties/test_pandas_roundtrip.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/properties/test_pandas_roundtrip.py b/properties/test_pandas_roundtrip.py index 537f8202f0a..224fc6c550e 100644 --- a/properties/test_pandas_roundtrip.py +++ b/properties/test_pandas_roundtrip.py @@ -1,6 +1,10 @@ """ Property-based tests for roundtripping between xarray and pandas objects. """ +import pytest + +pytest.importorskip("hypothesis") + from functools import partial import hypothesis.extra.numpy as npst import hypothesis.extra.pandas as pdst From 044c67d2ec2b63c67a2554d8555bb0bd8c342614 Mon Sep 17 00:00:00 2001 From: Thomas Kluyver Date: Tue, 29 Oct 2019 15:26:27 +0000 Subject: [PATCH 15/16] Don't require hypothesis for conftest file --- properties/conftest.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/properties/conftest.py b/properties/conftest.py index 93c35e07b70..0a66d92ebc6 100644 --- a/properties/conftest.py +++ b/properties/conftest.py @@ -1,5 +1,8 @@ -from hypothesis import settings - -# Run for a while - arrays are a bigger search space than usual -settings.register_profile("ci", deadline=None, print_blob=True) -settings.load_profile("ci") +try: + from hypothesis import settings +except ImportError: + pass +else: + # Run for a while - arrays are a bigger search space than usual + settings.register_profile("ci", deadline=None, print_blob=True) + settings.load_profile("ci") From 5b0ae82951b099e1b48a6f983dc85225b163d4cc Mon Sep 17 00:00:00 2001 From: Thomas Kluyver Date: Wed, 30 Oct 2019 10:01:02 +0000 Subject: [PATCH 16/16] Mark failing test as xfail --- properties/test_pandas_roundtrip.py | 1 + 1 file changed, 1 insertion(+) diff --git a/properties/test_pandas_roundtrip.py b/properties/test_pandas_roundtrip.py index 224fc6c550e..a8005d319d6 100644 --- a/properties/test_pandas_roundtrip.py +++ b/properties/test_pandas_roundtrip.py @@ -85,6 +85,7 @@ def test_roundtrip_pandas_series(ser, ix_name): ) +@pytest.mark.xfail @given(numeric_homogeneous_dataframe) def test_roundtrip_pandas_dataframe(df): # Need to name the indexes, otherwise Xarray names them 'dim_0', 'dim_1'.