Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Commit

Permalink
Added tests for other compressions parquet (#674)
Browse files Browse the repository at this point in the history
  • Loading branch information
jorgecarleitao authored Dec 12, 2021
1 parent 5b2fb51 commit 915cb6c
Show file tree
Hide file tree
Showing 2 changed files with 92 additions and 77 deletions.
24 changes: 10 additions & 14 deletions parquet_integration/write_parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,7 +232,7 @@ def write_pyarrow(
page_version: int,
use_dictionary: bool,
multiple_pages: bool,
compression: bool,
compression: str,
):
data, schema, path = case(size)

Expand All @@ -244,12 +244,7 @@ def write_pyarrow(
base_path = f"{base_path}/multi"

if compression:
base_path = f"{base_path}/snappy"

if compression:
compression = "snappy"
else:
compression = None
base_path = f"{base_path}/{compression}"

if multiple_pages:
data_page_size = 2 ** 10 # i.e. a small number to ensure multiple pages
Expand All @@ -273,7 +268,8 @@ def write_pyarrow(
for case in [case_basic_nullable, case_basic_required, case_nested, case_struct]:
for version in [1, 2]:
for use_dict in [True, False]:
write_pyarrow(case, 1, version, use_dict, False, False)
for compression in ["lz4", None, "snappy"]:
write_pyarrow(case, 1, version, use_dict, False, compression)


def case_benches(size):
Expand All @@ -295,14 +291,14 @@ def case_benches_required(size):
# for read benchmarks
for i in range(10, 22, 2):
# two pages (dict)
write_pyarrow(case_benches, 2 ** i, 1, True, False, False)
write_pyarrow(case_benches, 2 ** i, 1, True, False, None)
# single page
write_pyarrow(case_benches, 2 ** i, 1, False, False, False)
write_pyarrow(case_benches, 2 ** i, 1, False, False, None)
# single page required
write_pyarrow(case_benches_required, 2 ** i, 1, False, False, False)
write_pyarrow(case_benches_required, 2 ** i, 1, False, False, None)
# multiple pages
write_pyarrow(case_benches, 2 ** i, 1, False, True, False)
write_pyarrow(case_benches, 2 ** i, 1, False, True, None)
# multiple compressed pages
write_pyarrow(case_benches, 2 ** i, 1, False, True, True)
write_pyarrow(case_benches, 2 ** i, 1, False, True, "snappy")
# single compressed page
write_pyarrow(case_benches, 2 ** i, 1, False, False, True)
write_pyarrow(case_benches, 2 ** i, 1, False, False, "snappy")
Loading

0 comments on commit 915cb6c

Please sign in to comment.