From 9c3a2fb59d934fbfd3e8f999c1044f8dedb36033 Mon Sep 17 00:00:00 2001 From: wshaoul Date: Fri, 4 Oct 2024 20:42:01 -0400 Subject: [PATCH 1/2] Added a helper function to help the date column dtype be customized --- pandas/io/parsers/arrow_parser_wrapper.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/pandas/io/parsers/arrow_parser_wrapper.py b/pandas/io/parsers/arrow_parser_wrapper.py index 86bb5f190e403..523597920948e 100644 --- a/pandas/io/parsers/arrow_parser_wrapper.py +++ b/pandas/io/parsers/arrow_parser_wrapper.py @@ -306,4 +306,25 @@ def read(self) -> DataFrame: else: frame = table.to_pandas() + + self._set_date_column_dtype(frame, date_columns=["date_column1", "date_column2"], dtype="timestamp[ns][pyarrow]") + return self._finalize_pandas_output(frame) + + + def _set_date_column_dtype(self, frame: DataFrame, date_columns: list, dtype: str): + """ + Sets the dtype for specified date columns in the DataFrame. + + Parameters + ---------- + frame : DataFrame + The DataFrame to modify. + date_columns : list + List of column names that are date columns. + dtype : str + The dtype to apply to these columns, e.g., 'datetime64[ns]'. + """ + for col in date_columns: + if col in frame.columns: + frame[col] = frame[col].astype(dtype) From 2ea84ee458b9c99e31f082b953ad9b59f420cf0b Mon Sep 17 00:00:00 2001 From: wshaoul Date: Fri, 4 Oct 2024 21:16:30 -0400 Subject: [PATCH 2/2] Moved dtype setting function to finalize output function --- pandas/io/parsers/arrow_parser_wrapper.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/io/parsers/arrow_parser_wrapper.py b/pandas/io/parsers/arrow_parser_wrapper.py index 523597920948e..658942a41fd6c 100644 --- a/pandas/io/parsers/arrow_parser_wrapper.py +++ b/pandas/io/parsers/arrow_parser_wrapper.py @@ -217,6 +217,9 @@ def _finalize_pandas_output(self, frame: DataFrame) -> DataFrame: except TypeError as err: # GH#44901 reraise to keep api consistent raise ValueError(str(err)) from err + + self._set_date_column_dtype(frame, date_columns=["date_column1", "date_column2"], dtype="timestamp[ns][pyarrow]") + return frame def _validate_usecols(self, usecols) -> None: @@ -306,8 +309,6 @@ def read(self) -> DataFrame: else: frame = table.to_pandas() - - self._set_date_column_dtype(frame, date_columns=["date_column1", "date_column2"], dtype="timestamp[ns][pyarrow]") return self._finalize_pandas_output(frame)