From 27f8464ed50520464970d8a9914813d81ce48ccf Mon Sep 17 00:00:00 2001 From: Ian Cook Date: Wed, 8 May 2024 21:24:01 -0400 Subject: [PATCH] Update for consistency after #46129 --- python/pyspark/sql/classic/dataframe.py | 3 +++ python/pyspark/sql/dataframe.py | 26 +++++++++++++++++++++++++ python/pyspark/sql/pandas/conversion.py | 20 ------------------- 3 files changed, 29 insertions(+), 20 deletions(-) diff --git a/python/pyspark/sql/classic/dataframe.py b/python/pyspark/sql/classic/dataframe.py index db9f22517ddad..aa2bf8d02b8c2 100644 --- a/python/pyspark/sql/classic/dataframe.py +++ b/python/pyspark/sql/classic/dataframe.py @@ -1825,6 +1825,9 @@ def mapInArrow( ) -> ParentDataFrame: return PandasMapOpsMixin.mapInArrow(self, func, schema, barrier, profile) + def toArrowTable(self) -> "pa.Table": + return PandasConversionMixin.toArrowTable(self) + def toPandas(self) -> "PandasDataFrameLike": return PandasConversionMixin.toPandas(self) diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py index e3d52c45d0c1d..4219d956b362a 100644 --- a/python/pyspark/sql/dataframe.py +++ b/python/pyspark/sql/dataframe.py @@ -1200,6 +1200,7 @@ def collect(self) -> List[Row]: DataFrame.take : Returns the first `n` rows. DataFrame.head : Returns the first `n` rows. DataFrame.toPandas : Returns the data as a pandas DataFrame. + DataFrame.toArrowTable : Returns the data as a PyArrow Table. Notes ----- @@ -6213,6 +6214,31 @@ def mapInArrow( """ ... + def toArrowTable(self) -> "pa.Table": + """ + Returns the contents of this :class:`DataFrame` as PyArrow ``pyarrow.Table``. + + This is only available if PyArrow is installed and available. + + .. versionadded:: 4.0.0 + + Notes + ----- + This method should only be used if the resulting PyArrow ``pyarrow.Table`` is + expected to be small, as all the data is loaded into the driver's memory. + + Examples + -------- + >>> df.toArrowTable() # doctest: +SKIP + pyarrow.Table + age: int64 + name: string + ---- + age: [[2,5]] + name: [["Alice","Bob"]] + """ + ... + def toPandas(self) -> "PandasDataFrameLike": """ Returns the contents of this :class:`DataFrame` as Pandas ``pandas.DataFrame``. diff --git a/python/pyspark/sql/pandas/conversion.py b/python/pyspark/sql/pandas/conversion.py index 4a6a753fec8ce..07272daf00fd0 100644 --- a/python/pyspark/sql/pandas/conversion.py +++ b/python/pyspark/sql/pandas/conversion.py @@ -226,26 +226,6 @@ def toPandas(self) -> "PandasDataFrameLike": return pdf def toArrowTable(self) -> "pa.Table": - """ - Returns the contents of this :class:`DataFrame` as PyArrow ``pyarrow.Table``. - - This is only available if PyArrow is installed and available. - - Notes - ----- - This method should only be used if the resulting PyArrow ``pyarrow.Table`` is - expected to be small, as all the data is loaded into the driver's memory. - - Examples - -------- - >>> df.toArrowTable() # doctest: +SKIP - pyarrow.Table - age: int64 - name: string - ---- - age: [[2,5]] - name: [["Alice","Bob"]] - """ from pyspark.sql.dataframe import DataFrame assert isinstance(self, DataFrame)