From 1d072ee41796b8e6a930fd8285b1313e7ca53299 Mon Sep 17 00:00:00 2001
From: Piriyakorn P <piriyakorn.piriyatamwong@inf.ethz.ch>
Date: Thu, 2 May 2024 20:30:50 +0200
Subject: [PATCH 1/5] fix: raise ValueError when convert from pandas when index
 name duplicates column name (#15938)

---
 py-polars/polars/_utils/construction/dataframe.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/py-polars/polars/_utils/construction/dataframe.py b/py-polars/polars/_utils/construction/dataframe.py
index c20a60c3162e..ca2b6a202bed 100644
--- a/py-polars/polars/_utils/construction/dataframe.py
+++ b/py-polars/polars/_utils/construction/dataframe.py
@@ -1020,6 +1020,9 @@ def pandas_to_pydf(
 ) -> PyDataFrame:
     """Construct a PyDataFrame from a pandas DataFrame."""
     convert_index = include_index and not _pandas_has_default_index(data)
+    if convert_index and data.index.name in data.columns:
+        msg = "cannot convert pandas DataFrame when index name duplicates some column name"
+        raise ValueError(msg)
     if not convert_index and all(
         is_simple_numpy_backed_pandas_series(data[col]) for col in data.columns
     ):

From 8eeb0b5a2df028d6ea9bcab86767f6c9355b937c Mon Sep 17 00:00:00 2001
From: Piriyakorn P <piriyakorn.piriyatamwong@inf.ethz.ch>
Date: Thu, 2 May 2024 20:36:07 +0200
Subject: [PATCH 2/5] fix: raise ValueError when convert from pandas when index
 name duplicates column name (#15938) - improve readability

---
 py-polars/polars/_utils/construction/dataframe.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/py-polars/polars/_utils/construction/dataframe.py b/py-polars/polars/_utils/construction/dataframe.py
index ca2b6a202bed..65957ea8366e 100644
--- a/py-polars/polars/_utils/construction/dataframe.py
+++ b/py-polars/polars/_utils/construction/dataframe.py
@@ -1020,7 +1020,7 @@ def pandas_to_pydf(
 ) -> PyDataFrame:
     """Construct a PyDataFrame from a pandas DataFrame."""
     convert_index = include_index and not _pandas_has_default_index(data)
-    if convert_index and data.index.name in data.columns:
+    if convert_index and (data.index.name in data.columns):
         msg = "cannot convert pandas DataFrame when index name duplicates some column name"
         raise ValueError(msg)
     if not convert_index and all(

From 679940ec799a6df3d2d6980fa39d94699df21d17 Mon Sep 17 00:00:00 2001
From: Piriyakorn P <piriyakorn.piriyatamwong@inf.ethz.ch>
Date: Thu, 2 May 2024 21:07:28 +0200
Subject: [PATCH 3/5] fix: raise ValueError when convert from pandas when index
 name duplicates column name (#15938) - add unit tests

---
 .../tests/unit/dataframe/test_from_pandas.py  | 34 +++++++++++++++++++
 1 file changed, 34 insertions(+)
 create mode 100644 py-polars/tests/unit/dataframe/test_from_pandas.py

diff --git a/py-polars/tests/unit/dataframe/test_from_pandas.py b/py-polars/tests/unit/dataframe/test_from_pandas.py
new file mode 100644
index 000000000000..cda6958bdd6a
--- /dev/null
+++ b/py-polars/tests/unit/dataframe/test_from_pandas.py
@@ -0,0 +1,34 @@
+from __future__ import annotations
+
+import pytest
+
+import pandas as pd
+import polars as pl
+
+
+def test_from_pandas_exclude_index() -> None:
+    data = pd.DataFrame({"a": [1, 2], "b": [3, 4]}, index=pd.Index([5, 6], name="c"))
+    df = pl.from_pandas(data, include_index=False)
+    assert df.columns == ["a", "b"]
+    assert df.rows() == [(1, 3), (2, 4)]
+
+
+def test_from_pandas_include_index() -> None:
+    data = pd.DataFrame({"a": [1, 2], "b": [3, 4]}, index=pd.Index([5, 6], name="c"))
+    df = pl.from_pandas(data, include_index=True)
+    assert df.columns == ["c", "a", "b"]
+    assert df.rows() == [(5, 1, 3), (6, 2, 4)]
+
+
+def test_from_pandas_exclude_dup_index() -> None:
+    data = pd.DataFrame({"a": [1, 2], "b": [3, 4]}, index=pd.Index([5, 6], name="a"))
+    df = pl.from_pandas(data, include_index=False)
+    assert df.columns == ["a", "b"]
+    assert df.rows() == [(1, 3), (2, 4)]
+
+
+def test_from_pandas_include_dup_index() -> None:
+    data = pd.DataFrame({"a": [1, 2], "b": [3, 4]}, index=pd.Index([5, 6], name="a"))
+
+    with pytest.raises(ValueError):
+        pl.from_pandas(data, include_index=True)

From 019506b073af82d9116b5fc506a014deefd57644 Mon Sep 17 00:00:00 2001
From: Piriyakorn P <piriyakorn.piriyatamwong@inf.ethz.ch>
Date: Thu, 2 May 2024 21:10:04 +0200
Subject: [PATCH 4/5] fix: raise ValueError when convert from pandas when index
 name duplicates column name (#15938) - correct linting

---
 py-polars/tests/unit/dataframe/test_from_pandas.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/py-polars/tests/unit/dataframe/test_from_pandas.py b/py-polars/tests/unit/dataframe/test_from_pandas.py
index cda6958bdd6a..6047f833bda3 100644
--- a/py-polars/tests/unit/dataframe/test_from_pandas.py
+++ b/py-polars/tests/unit/dataframe/test_from_pandas.py
@@ -1,8 +1,8 @@
 from __future__ import annotations
 
+import pandas as pd
 import pytest
 
-import pandas as pd
 import polars as pl
 
 

From 2a5dccf232c9e4a3e983a893607dfa0ee99d36c2 Mon Sep 17 00:00:00 2001
From: Piriyakorn P <piriyakorn.piriyatamwong@inf.ethz.ch>
Date: Thu, 2 May 2024 21:37:49 +0200
Subject: [PATCH 5/5] fix: raise ValueError when convert from pandas when index
 name duplicates column name (#15938) - handle multiindex

---
 .../polars/_utils/construction/dataframe.py   |  6 +-
 .../tests/unit/dataframe/test_from_pandas.py  | 34 ----------
 py-polars/tests/unit/interop/test_interop.py  | 68 +++++++++++++++++++
 3 files changed, 72 insertions(+), 36 deletions(-)
 delete mode 100644 py-polars/tests/unit/dataframe/test_from_pandas.py

diff --git a/py-polars/polars/_utils/construction/dataframe.py b/py-polars/polars/_utils/construction/dataframe.py
index 65957ea8366e..1986525eb290 100644
--- a/py-polars/polars/_utils/construction/dataframe.py
+++ b/py-polars/polars/_utils/construction/dataframe.py
@@ -1020,8 +1020,10 @@ def pandas_to_pydf(
 ) -> PyDataFrame:
     """Construct a PyDataFrame from a pandas DataFrame."""
     convert_index = include_index and not _pandas_has_default_index(data)
-    if convert_index and (data.index.name in data.columns):
-        msg = "cannot convert pandas DataFrame when index name duplicates some column name"
+    if convert_index and set(data.index.names).intersection(data.columns):
+        msg = (
+            "cannot create DataFrame with some index name duplicating some column name"
+        )
         raise ValueError(msg)
     if not convert_index and all(
         is_simple_numpy_backed_pandas_series(data[col]) for col in data.columns
diff --git a/py-polars/tests/unit/dataframe/test_from_pandas.py b/py-polars/tests/unit/dataframe/test_from_pandas.py
deleted file mode 100644
index 6047f833bda3..000000000000
--- a/py-polars/tests/unit/dataframe/test_from_pandas.py
+++ /dev/null
@@ -1,34 +0,0 @@
-from __future__ import annotations
-
-import pandas as pd
-import pytest
-
-import polars as pl
-
-
-def test_from_pandas_exclude_index() -> None:
-    data = pd.DataFrame({"a": [1, 2], "b": [3, 4]}, index=pd.Index([5, 6], name="c"))
-    df = pl.from_pandas(data, include_index=False)
-    assert df.columns == ["a", "b"]
-    assert df.rows() == [(1, 3), (2, 4)]
-
-
-def test_from_pandas_include_index() -> None:
-    data = pd.DataFrame({"a": [1, 2], "b": [3, 4]}, index=pd.Index([5, 6], name="c"))
-    df = pl.from_pandas(data, include_index=True)
-    assert df.columns == ["c", "a", "b"]
-    assert df.rows() == [(5, 1, 3), (6, 2, 4)]
-
-
-def test_from_pandas_exclude_dup_index() -> None:
-    data = pd.DataFrame({"a": [1, 2], "b": [3, 4]}, index=pd.Index([5, 6], name="a"))
-    df = pl.from_pandas(data, include_index=False)
-    assert df.columns == ["a", "b"]
-    assert df.rows() == [(1, 3), (2, 4)]
-
-
-def test_from_pandas_include_dup_index() -> None:
-    data = pd.DataFrame({"a": [1, 2], "b": [3, 4]}, index=pd.Index([5, 6], name="a"))
-
-    with pytest.raises(ValueError):
-        pl.from_pandas(data, include_index=True)
diff --git a/py-polars/tests/unit/interop/test_interop.py b/py-polars/tests/unit/interop/test_interop.py
index b357c12ff1f4..0d28dcb587e1 100644
--- a/py-polars/tests/unit/interop/test_interop.py
+++ b/py-polars/tests/unit/interop/test_interop.py
@@ -179,6 +179,74 @@ def test_from_pandas_duplicated_columns() -> None:
         pl.from_pandas(df)
 
 
+def test_from_pandas_exclude_index() -> None:
+    data = pd.DataFrame({"a": [1, 2], "b": [3, 4]}, index=pd.Index([5, 6], name="c"))
+    df = pl.from_pandas(data, include_index=False)
+    assert df.columns == ["a", "b"]
+    assert df.rows() == [(1, 3), (2, 4)]
+
+
+def test_from_pandas_include_index() -> None:
+    data = pd.DataFrame({"a": [1, 2], "b": [3, 4]}, index=pd.Index([5, 6], name="c"))
+    df = pl.from_pandas(data, include_index=True)
+    assert df.columns == ["c", "a", "b"]
+    assert df.rows() == [(5, 1, 3), (6, 2, 4)]
+
+
+def test_from_pandas_exclude_dup_index() -> None:
+    data = pd.DataFrame({"a": [1, 2], "b": [3, 4]}, index=pd.Index([5, 6], name="a"))
+    df = pl.from_pandas(data, include_index=False)
+    assert df.columns == ["a", "b"]
+    assert df.rows() == [(1, 3), (2, 4)]
+
+
+def test_from_pandas_include_dup_index() -> None:
+    data = pd.DataFrame({"a": [1, 2], "b": [3, 4]}, index=pd.Index([5, 6], name="a"))
+
+    with pytest.raises(ValueError):
+        pl.from_pandas(data, include_index=True)
+
+
+def test_from_pandas_exclude_multi_index() -> None:
+    data = pd.DataFrame(
+        {"a": [1, 2], "b": [3, 4]},
+        index=pd.MultiIndex.from_arrays([(5, 6), (7, 8)], names=["c", "d"]),
+    )
+    df = pl.from_pandas(data, include_index=False)
+    assert df.columns == ["a", "b"]
+    assert df.rows() == [(1, 3), (2, 4)]
+
+
+def test_from_pandas_include_multi_index() -> None:
+    data = pd.DataFrame(
+        {"a": [1, 2], "b": [3, 4]},
+        index=pd.MultiIndex.from_arrays([(5, 6), (7, 8)], names=["c", "d"]),
+    )
+    df = pl.from_pandas(data, include_index=True)
+    assert df.columns == ["c", "d", "a", "b"]
+    assert df.rows() == [(5, 7, 1, 3), (6, 8, 2, 4)]
+
+
+def test_from_pandas_exclude_dup_multi_index() -> None:
+    data = pd.DataFrame(
+        {"a": [1, 2], "b": [3, 4]},
+        index=pd.MultiIndex.from_arrays([(5, 6), (7, 8)], names=["b", "c"]),
+    )
+    df = pl.from_pandas(data, include_index=False)
+    assert df.columns == ["a", "b"]
+    assert df.rows() == [(1, 3), (2, 4)]
+
+
+def test_from_pandas_include_dup_multi_index() -> None:
+    data = pd.DataFrame(
+        {"a": [1, 2], "b": [3, 4]},
+        index=pd.MultiIndex.from_arrays([(5, 6), (7, 8)], names=["b", "c"]),
+    )
+
+    with pytest.raises(ValueError):
+        pl.from_pandas(data, include_index=True)
+
+
 def test_arrow_list_roundtrip() -> None:
     # https://github.com/pola-rs/polars/issues/1064
     tbl = pa.table({"a": [1], "b": [[1, 2]]})