Skip to content

Commit

Permalink
fix: support root empty field in Parquet file (#1619)
Browse files Browse the repository at this point in the history
* refactor: remove duplicate loc

* fix: specify column_prefix

* test: add test for #1619

* test: update test to show crash happens with extension arrays
  • Loading branch information
agoose77 authored Aug 26, 2022
1 parent bcfb0a9 commit 229c09f
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 8 deletions.
20 changes: 12 additions & 8 deletions src/awkward/_v2/operations/ak_from_parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,16 +132,18 @@ def metadata(
):
list_indicator = "list.element"
break

subform = ak._v2._connect.pyarrow.form_handle_arrow(
parquetfile_for_metadata.schema_arrow, pass_empty_field=True
)
if columns is not None:
subform = subform.select_columns(columns)

form = ak._v2._connect.pyarrow.form_handle_arrow(
parquetfile_for_metadata.schema_arrow, pass_empty_field=True
)
subform = form.select_columns(columns)
# Handle empty field at root
if parquetfile_for_metadata.schema_arrow.names == [""]:
column_prefix = ("",)
else:
subform = ak._v2._connect.pyarrow.form_handle_arrow(
parquetfile_for_metadata.schema_arrow, pass_empty_field=True
)
column_prefix = ()

metadata = parquetfile_for_metadata.metadata
if scan_files and not path_for_schema.endswith("/_metadata"):
Expand Down Expand Up @@ -196,7 +198,9 @@ def metadata(
else:
col_counts = None

parquet_columns = subform.columns(list_indicator=list_indicator)
parquet_columns = subform.columns(
list_indicator=list_indicator, column_prefix=column_prefix
)

return parquet_columns, subform, actual_paths, fs, subrg, col_counts, metadata

Expand Down
31 changes: 31 additions & 0 deletions tests/v2/test_1619-from-parquet-empty-field.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE

import os

import pytest # noqa: F401
import numpy as np # noqa: F401
import awkward as ak # noqa: F401

pytest.importorskip("pyarrow.parquet")


def test_no_extension(tmp_path):
array = ak._v2.Array(
[
[
{"x": 1, "y": 1.1},
{"x": 2, "y": 2.2},
{"x": 3, "y": 3.3},
],
[
{"x": 1, "y": 1.1},
{"x": 2, "y": 2.2},
],
]
)
path = os.path.join(tmp_path, "array-no-ext.parquet")

ak._v2.to_parquet(array, path, extensionarray=False)

result = ak._v2.from_parquet(path, columns=["x"])
assert result.fields == ["x"]

0 comments on commit 229c09f

Please sign in to comment.