From c0bc0a09e9acfe7e0e38a4e67f81d5afb992745c Mon Sep 17 00:00:00 2001 From: Fokko Driesprong Date: Fri, 3 Mar 2023 13:18:25 +0100 Subject: [PATCH 1/4] GH-34428: [Python][Docs] Add docsstring for `make_fragment` --- python/pyarrow/_dataset.pyx | 7 ++++++- python/pyarrow/_dataset_parquet.pyx | 20 ++++++++++++++++++++ 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/python/pyarrow/_dataset.pyx b/python/pyarrow/_dataset.pyx index 25e793572172a..f2c1149d920fe 100644 --- a/python/pyarrow/_dataset.pyx +++ b/python/pyarrow/_dataset.pyx @@ -891,8 +891,13 @@ cdef class FileFormat(_Weakrefable): filesystem : Filesystem, optional If `filesystem` is given, `file` must be a string and specifies the path of the file to read from the filesystem. - partition_expression : Expression + partition_expression : Expression, optional The filter expression. + + Returns + ------- + fragment : Fragment + The file fragment """ if partition_expression is None: partition_expression = _true diff --git a/python/pyarrow/_dataset_parquet.pyx b/python/pyarrow/_dataset_parquet.pyx index 01a3b30da5ca7..c4992c45eed5f 100644 --- a/python/pyarrow/_dataset_parquet.pyx +++ b/python/pyarrow/_dataset_parquet.pyx @@ -209,6 +209,26 @@ cdef class ParquetFileFormat(FileFormat): def make_fragment(self, file, filesystem=None, Expression partition_expression=None, row_groups=None): + """ + Make a FileFragment from a given file. + + Parameters + ---------- + file : file-like object, path-like or str + The file or file path to make a fragment from. + filesystem : Filesystem, optional + If `filesystem` is given, `file` must be a string and specifies + the path of the file to read from the filesystem. + partition_expression : Expression, optional + The filter expression. + row_groups : Iterable, optional + The row groups to include + + Returns + ------- + fragment : Fragment + The file fragment + """ cdef: vector[int] c_row_groups From 841ce11bf1418a6b8a9169fc20f584a68d8fa107 Mon Sep 17 00:00:00 2001 From: Fokko Driesprong Date: Mon, 6 Mar 2023 21:47:59 +0100 Subject: [PATCH 2/4] Better wording Co-authored-by: Weston Pace --- python/pyarrow/_dataset_parquet.pyx | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python/pyarrow/_dataset_parquet.pyx b/python/pyarrow/_dataset_parquet.pyx index c4992c45eed5f..4cbd657e185ce 100644 --- a/python/pyarrow/_dataset_parquet.pyx +++ b/python/pyarrow/_dataset_parquet.pyx @@ -220,7 +220,8 @@ cdef class ParquetFileFormat(FileFormat): If `filesystem` is given, `file` must be a string and specifies the path of the file to read from the filesystem. partition_expression : Expression, optional - The filter expression. + An expression that is guaranteed true for all rows in the fragment. Allows + fragment to be potentially skipped while scanning with a filter. row_groups : Iterable, optional The row groups to include From 7bf9a0d6044102c58cfe0e27e74d58ccae3a1734 Mon Sep 17 00:00:00 2001 From: Fokko Driesprong Date: Mon, 6 Mar 2023 22:38:44 +0100 Subject: [PATCH 3/4] Update python/pyarrow/_dataset_parquet.pyx --- python/pyarrow/_dataset_parquet.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pyarrow/_dataset_parquet.pyx b/python/pyarrow/_dataset_parquet.pyx index 4cbd657e185ce..d3ceca85e8c68 100644 --- a/python/pyarrow/_dataset_parquet.pyx +++ b/python/pyarrow/_dataset_parquet.pyx @@ -223,7 +223,7 @@ cdef class ParquetFileFormat(FileFormat): An expression that is guaranteed true for all rows in the fragment. Allows fragment to be potentially skipped while scanning with a filter. row_groups : Iterable, optional - The row groups to include + The indices of the row groups to include Returns ------- From 9ea31d3f0a97f33f196ab492c56ab697f0e4ba5d Mon Sep 17 00:00:00 2001 From: Fokko Driesprong Date: Tue, 7 Mar 2023 07:38:46 +0100 Subject: [PATCH 4/4] Update python/pyarrow/_dataset.pyx Co-authored-by: Weston Pace --- python/pyarrow/_dataset.pyx | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python/pyarrow/_dataset.pyx b/python/pyarrow/_dataset.pyx index f2c1149d920fe..ec449c16b503a 100644 --- a/python/pyarrow/_dataset.pyx +++ b/python/pyarrow/_dataset.pyx @@ -892,7 +892,8 @@ cdef class FileFormat(_Weakrefable): If `filesystem` is given, `file` must be a string and specifies the path of the file to read from the filesystem. partition_expression : Expression, optional - The filter expression. + An expression that is guaranteed true for all rows in the fragment. Allows + fragment to be potentially skipped while scanning with a filter. Returns -------