Skip to content

Commit

Permalink
Hypothesis tests for roundtrip to & from pandas (#3285)
Browse files Browse the repository at this point in the history
* Move hypothesis deadline configuration to conftest.py

* Add simple roundtrip test for xarray-pandas-xarray

* Test roundtrip pd.Series->DataArray->Series

* Test roundtrip DataFrame->DataArray->DataFrame

* Test roundtrip Dataset->Dataframe->Dataset

* Relax to allow 0 entries in each dataset var

* Relax to allow empty string names

* Add print_blob to config

* Extra half-roundtrip from pandas series to xarray

* Extra half roundtrip from pandas dataframe to Xarray

* Redesign strategy for generating datasets with 1D variables

Following suggestions from @Zac-HD

* Make pep8 happy

* Autoformat test file

* Skip hypothesis tests if hypothesis not available

* Don't require hypothesis for conftest file

* Mark failing test as xfail
  • Loading branch information
takluyver authored and dcherian committed Oct 30, 2019
1 parent 092d300 commit f115ad1
Show file tree
Hide file tree
Showing 3 changed files with 106 additions and 6 deletions.
8 changes: 8 additions & 0 deletions properties/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
try:
from hypothesis import settings
except ImportError:
pass
else:
# Run for a while - arrays are a bigger search space than usual
settings.register_profile("ci", deadline=None, print_blob=True)
settings.load_profile("ci")
7 changes: 1 addition & 6 deletions properties/test_encode_decode.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,10 @@

import hypothesis.extra.numpy as npst
import hypothesis.strategies as st
from hypothesis import given, settings
from hypothesis import given

import xarray as xr

# Run for a while - arrays are a bigger search space than usual
settings.register_profile("ci", deadline=None)
settings.load_profile("ci")


an_array = npst.arrays(
dtype=st.one_of(
npst.unsigned_integer_dtypes(), npst.integer_dtypes(), npst.floating_dtypes()
Expand Down
97 changes: 97 additions & 0 deletions properties/test_pandas_roundtrip.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
"""
Property-based tests for roundtripping between xarray and pandas objects.
"""
import pytest

pytest.importorskip("hypothesis")

from functools import partial
import hypothesis.extra.numpy as npst
import hypothesis.extra.pandas as pdst
import hypothesis.strategies as st
from hypothesis import given

import numpy as np
import pandas as pd
import xarray as xr

numeric_dtypes = st.one_of(
npst.unsigned_integer_dtypes(), npst.integer_dtypes(), npst.floating_dtypes()
)

numeric_series = numeric_dtypes.flatmap(lambda dt: pdst.series(dtype=dt))

an_array = npst.arrays(
dtype=numeric_dtypes,
shape=npst.array_shapes(max_dims=2), # can only convert 1D/2D to pandas
)


@st.composite
def datasets_1d_vars(draw):
"""Generate datasets with only 1D variables
Suitable for converting to pandas dataframes.
"""
# Generate an index for the dataset
idx = draw(pdst.indexes(dtype="u8", min_size=0, max_size=100))

# Generate 1-3 variables, 1D with the same length as the index
vars_strategy = st.dictionaries(
keys=st.text(),
values=npst.arrays(dtype=numeric_dtypes, shape=len(idx)).map(
partial(xr.Variable, ("rows",))
),
min_size=1,
max_size=3,
)
return xr.Dataset(draw(vars_strategy), coords={"rows": idx})


@given(st.data(), an_array)
def test_roundtrip_dataarray(data, arr):
names = data.draw(
st.lists(st.text(), min_size=arr.ndim, max_size=arr.ndim, unique=True).map(
tuple
)
)
coords = {name: np.arange(n) for (name, n) in zip(names, arr.shape)}
original = xr.DataArray(arr, dims=names, coords=coords)
roundtripped = xr.DataArray(original.to_pandas())
xr.testing.assert_identical(original, roundtripped)


@given(datasets_1d_vars())
def test_roundtrip_dataset(dataset):
df = dataset.to_dataframe()
assert isinstance(df, pd.DataFrame)
roundtripped = xr.Dataset(df)
xr.testing.assert_identical(dataset, roundtripped)


@given(numeric_series, st.text())
def test_roundtrip_pandas_series(ser, ix_name):
# Need to name the index, otherwise Xarray calls it 'dim_0'.
ser.index.name = ix_name
arr = xr.DataArray(ser)
roundtripped = arr.to_pandas()
pd.testing.assert_series_equal(ser, roundtripped)
xr.testing.assert_identical(arr, roundtripped.to_xarray())


# Dataframes with columns of all the same dtype - for roundtrip to DataArray
numeric_homogeneous_dataframe = numeric_dtypes.flatmap(
lambda dt: pdst.data_frames(columns=pdst.columns(["a", "b", "c"], dtype=dt))
)


@pytest.mark.xfail
@given(numeric_homogeneous_dataframe)
def test_roundtrip_pandas_dataframe(df):
# Need to name the indexes, otherwise Xarray names them 'dim_0', 'dim_1'.
df.index.name = "rows"
df.columns.name = "cols"
arr = xr.DataArray(df)
roundtripped = arr.to_pandas()
pd.testing.assert_frame_equal(df, roundtripped)
xr.testing.assert_identical(arr, roundtripped.to_xarray())

0 comments on commit f115ad1

Please sign in to comment.