diff --git a/py-polars/polars/_utils/construction/dataframe.py b/py-polars/polars/_utils/construction/dataframe.py index c20a60c3162e..1986525eb290 100644 --- a/py-polars/polars/_utils/construction/dataframe.py +++ b/py-polars/polars/_utils/construction/dataframe.py @@ -1020,6 +1020,11 @@ def pandas_to_pydf( ) -> PyDataFrame: """Construct a PyDataFrame from a pandas DataFrame.""" convert_index = include_index and not _pandas_has_default_index(data) + if convert_index and set(data.index.names).intersection(data.columns): + msg = ( + "cannot create DataFrame with some index name duplicating some column name" + ) + raise ValueError(msg) if not convert_index and all( is_simple_numpy_backed_pandas_series(data[col]) for col in data.columns ): diff --git a/py-polars/tests/unit/interop/test_interop.py b/py-polars/tests/unit/interop/test_interop.py index b357c12ff1f4..0d28dcb587e1 100644 --- a/py-polars/tests/unit/interop/test_interop.py +++ b/py-polars/tests/unit/interop/test_interop.py @@ -179,6 +179,74 @@ def test_from_pandas_duplicated_columns() -> None: pl.from_pandas(df) +def test_from_pandas_exclude_index() -> None: + data = pd.DataFrame({"a": [1, 2], "b": [3, 4]}, index=pd.Index([5, 6], name="c")) + df = pl.from_pandas(data, include_index=False) + assert df.columns == ["a", "b"] + assert df.rows() == [(1, 3), (2, 4)] + + +def test_from_pandas_include_index() -> None: + data = pd.DataFrame({"a": [1, 2], "b": [3, 4]}, index=pd.Index([5, 6], name="c")) + df = pl.from_pandas(data, include_index=True) + assert df.columns == ["c", "a", "b"] + assert df.rows() == [(5, 1, 3), (6, 2, 4)] + + +def test_from_pandas_exclude_dup_index() -> None: + data = pd.DataFrame({"a": [1, 2], "b": [3, 4]}, index=pd.Index([5, 6], name="a")) + df = pl.from_pandas(data, include_index=False) + assert df.columns == ["a", "b"] + assert df.rows() == [(1, 3), (2, 4)] + + +def test_from_pandas_include_dup_index() -> None: + data = pd.DataFrame({"a": [1, 2], "b": [3, 4]}, index=pd.Index([5, 6], name="a")) + + with pytest.raises(ValueError): + pl.from_pandas(data, include_index=True) + + +def test_from_pandas_exclude_multi_index() -> None: + data = pd.DataFrame( + {"a": [1, 2], "b": [3, 4]}, + index=pd.MultiIndex.from_arrays([(5, 6), (7, 8)], names=["c", "d"]), + ) + df = pl.from_pandas(data, include_index=False) + assert df.columns == ["a", "b"] + assert df.rows() == [(1, 3), (2, 4)] + + +def test_from_pandas_include_multi_index() -> None: + data = pd.DataFrame( + {"a": [1, 2], "b": [3, 4]}, + index=pd.MultiIndex.from_arrays([(5, 6), (7, 8)], names=["c", "d"]), + ) + df = pl.from_pandas(data, include_index=True) + assert df.columns == ["c", "d", "a", "b"] + assert df.rows() == [(5, 7, 1, 3), (6, 8, 2, 4)] + + +def test_from_pandas_exclude_dup_multi_index() -> None: + data = pd.DataFrame( + {"a": [1, 2], "b": [3, 4]}, + index=pd.MultiIndex.from_arrays([(5, 6), (7, 8)], names=["b", "c"]), + ) + df = pl.from_pandas(data, include_index=False) + assert df.columns == ["a", "b"] + assert df.rows() == [(1, 3), (2, 4)] + + +def test_from_pandas_include_dup_multi_index() -> None: + data = pd.DataFrame( + {"a": [1, 2], "b": [3, 4]}, + index=pd.MultiIndex.from_arrays([(5, 6), (7, 8)], names=["b", "c"]), + ) + + with pytest.raises(ValueError): + pl.from_pandas(data, include_index=True) + + def test_arrow_list_roundtrip() -> None: # https://github.com/pola-rs/polars/issues/1064 tbl = pa.table({"a": [1], "b": [[1, 2]]})