From 3c2d1bf3f1e3079c908dc7e06f565d08d4df71ee Mon Sep 17 00:00:00 2001 From: "Maarten A. Breddels" Date: Thu, 16 Sep 2021 13:04:08 +0200 Subject: [PATCH] support dataframe protocol (tested with Vaex) This allows plotly express to take in any dataframe that supports the dataframe protocol, see: https://data-apis.org/blog/dataframe_protocol_rfc/ https://data-apis.org/dataframe-protocol/latest/index.html Test includes an example with vaex, which should work with https://github.com/vaexio/vaex/pull/1509/ (not yet released) --- packages/python/plotly/plotly/express/_core.py | 12 +++++++++++- .../tests/test_optional/test_px/test_px_input.py | 13 +++++++++++++ 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/packages/python/plotly/plotly/express/_core.py b/packages/python/plotly/plotly/express/_core.py index c9f03580faf..b28a0134a59 100644 --- a/packages/python/plotly/plotly/express/_core.py +++ b/packages/python/plotly/plotly/express/_core.py @@ -1307,7 +1307,17 @@ def build_dataframe(args, constructor): # Cast data_frame argument to DataFrame (it could be a numpy array, dict etc.) df_provided = args["data_frame"] is not None if df_provided and not isinstance(args["data_frame"], pd.DataFrame): - args["data_frame"] = pd.DataFrame(args["data_frame"]) + if hasattr(args["data_frame"], "__dataframe__"): + # Pandas does not implement a `from_dataframe` yet + # $ wget https://raw.githubusercontent.com/data-apis/dataframe-api/main/protocol/pandas_implementation.py + # $ export PYTHONPATH=`pwd` + import pandas_implementation + + args["data_frame"] = pandas_implementation.from_dataframe( + args["data_frame"] + ) + else: + args["data_frame"] = pd.DataFrame(args["data_frame"]) df_input = args["data_frame"] # now we handle special cases like wide-mode or x-xor-y specification diff --git a/packages/python/plotly/plotly/tests/test_optional/test_px/test_px_input.py b/packages/python/plotly/plotly/tests/test_optional/test_px/test_px_input.py index 477e7dbcb04..1dbfc95dff9 100644 --- a/packages/python/plotly/plotly/tests/test_optional/test_px/test_px_input.py +++ b/packages/python/plotly/plotly/tests/test_optional/test_px/test_px_input.py @@ -233,6 +233,19 @@ def test_build_df_with_index(): assert_frame_equal(tips.reset_index()[out["data_frame"].columns], out["data_frame"]) +def test_build_df_protocol(): + import vaex + + # take out the 'species' columns since the vaex implementation does not cover strings yet + iris_pandas = px.data.iris()[["petal_width", "sepal_length"]] + iris_vaex = vaex.from_pandas(iris_pandas) + args = dict(data_frame=iris_vaex, x="petal_width", y="sepal_length") + out = build_dataframe(args, go.Scatter) + assert_frame_equal( + iris_pandas.reset_index()[out["data_frame"].columns], out["data_frame"] + ) + + def test_timezones(): df = pd.DataFrame({"date": ["2015-04-04 19:31:30+1:00"], "value": [3]}) df["date"] = pd.to_datetime(df["date"])