Skip to content

Commit

Permalink
fix: ensure df in empty parquet (#16621)
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 authored May 31, 2024
1 parent 7c7e834 commit 6e88f1d
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 1 deletion.
5 changes: 4 additions & 1 deletion crates/polars-io/src/parquet/read/read_impl.rs
Original file line number Diff line number Diff line change
Expand Up @@ -779,10 +779,13 @@ impl BatchedParquetReader {
self.chunks_fifo.push_back(df)
}
}
} else {
skipped_all_rgs = !self.has_returned;
};

if self.chunks_fifo.is_empty() {
if skipped_all_rgs {
self.has_returned = true;
Ok(Some(vec![materialize_empty_df(
Some(self.projection.as_ref()),
&self.schema,
Expand All @@ -803,7 +806,7 @@ impl BatchedParquetReader {
}
}

self.has_returned |= true;
self.has_returned = true;
Ok(Some(chunks))
}
}
Expand Down
10 changes: 10 additions & 0 deletions py-polars/tests/unit/streaming/test_streaming_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,3 +268,13 @@ def test_parquet_eq_statistics(monkeypatch: Any, capfd: Any, tmp_path: Path) ->
"parquet file can be skipped, the statistics were sufficient"
" to apply the predicate." in captured
)


@pytest.mark.write_disk()
def test_streaming_empty_parquet_16523(tmp_path: Path) -> None:
file_path = tmp_path / "foo.parquet"
df = pl.DataFrame({"a": []}, schema={"a": pl.Int32})
df.write_parquet(file_path)
q = pl.scan_parquet(file_path)
q2 = pl.LazyFrame({"a": [1]}, schema={"a": pl.Int32})
assert q.join(q2, on="a").collect(streaming=True).shape == (0, 1)

0 comments on commit 6e88f1d

Please sign in to comment.