From ca0e5f593cb8a7f719b7616cd741cecda1898e78 Mon Sep 17 00:00:00 2001 From: Ian Cook Date: Tue, 7 May 2024 07:31:41 -0400 Subject: [PATCH] Make non-experimental and rename toArrowTable --- python/pyspark/sql/connect/dataframe.py | 2 +- python/pyspark/sql/pandas/conversion.py | 6 ++---- python/pyspark/sql/tests/test_arrow.py | 2 +- 3 files changed, 4 insertions(+), 6 deletions(-) diff --git a/python/pyspark/sql/connect/dataframe.py b/python/pyspark/sql/connect/dataframe.py index 1d867cfe638bf..ddaadc1a289dc 100644 --- a/python/pyspark/sql/connect/dataframe.py +++ b/python/pyspark/sql/connect/dataframe.py @@ -1775,7 +1775,7 @@ def _to_table(self) -> Tuple["pa.Table", Optional[StructType]]: assert table is not None return (table, schema) - def _toArrow(self) -> "pa.Table": + def toArrowTable(self) -> "pa.Table": table, _ = self._to_table() return table diff --git a/python/pyspark/sql/pandas/conversion.py b/python/pyspark/sql/pandas/conversion.py index 825bd3292633f..4a6a753fec8ce 100644 --- a/python/pyspark/sql/pandas/conversion.py +++ b/python/pyspark/sql/pandas/conversion.py @@ -225,7 +225,7 @@ def toPandas(self) -> "PandasDataFrameLike": else: return pdf - def _toArrow(self) -> "pa.Table": + def toArrowTable(self) -> "pa.Table": """ Returns the contents of this :class:`DataFrame` as PyArrow ``pyarrow.Table``. @@ -238,15 +238,13 @@ def _toArrow(self) -> "pa.Table": Examples -------- - >>> df.toArrow() # doctest: +SKIP + >>> df.toArrowTable() # doctest: +SKIP pyarrow.Table age: int64 name: string ---- age: [[2,5]] name: [["Alice","Bob"]] - - .. note:: Experimental. """ from pyspark.sql.dataframe import DataFrame diff --git a/python/pyspark/sql/tests/test_arrow.py b/python/pyspark/sql/tests/test_arrow.py index 4ec9527f640d0..8fbbe0636ac1b 100644 --- a/python/pyspark/sql/tests/test_arrow.py +++ b/python/pyspark/sql/tests/test_arrow.py @@ -371,7 +371,7 @@ def test_pandas_round_trip(self): def test_arrow_round_trip(self): t_in = self.create_arrow_table() df = self.spark.createDataFrame(self.data, schema=self.schema) - t_out = df._toArrow() + t_out = df.toArrowTable() self.assertTrue(t_out.equals(t_in)) def test_pandas_self_destruct(self):