Skip to content

Commit

Permalink
Merge branch 'main' of github.com:delta-io/delta-sharing into pranavs…
Browse files Browse the repository at this point in the history
…uku-test-release-github
  • Loading branch information
pranavsuku-db committed Jul 29, 2024
2 parents 42585e7 + a6b1767 commit 5c1f263
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 0 deletions.
8 changes: 8 additions & 0 deletions python/delta_sharing/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,8 @@ def __to_pandas_kernel(self):
dump(deltaMetadata, json_file)
json_file.write("\n")

num_files = len(lines)

# Write the add file actions to the log file
for line in lines:
line_json = loads(line)
Expand All @@ -151,6 +153,12 @@ def __to_pandas_kernel(self):
table = delta_kernel_python.Table(table_path)
snapshot = table.snapshot(interface)
scan = delta_kernel_python.ScanBuilder(snapshot).build()

# The table is empty so use the schema to return an empty table with correct col names
if (num_files == 0):
schema = scan.execute(interface).schema
return pd.DataFrame(columns=schema.names)

table = pa.Table.from_batches(scan.execute(interface))
result = table.to_pandas()

Expand Down
24 changes: 24 additions & 0 deletions python/delta_sharing/tests/test_delta_sharing.py
Original file line number Diff line number Diff line change
Expand Up @@ -711,6 +711,30 @@ def test_load_as_pandas_success_dv_and_cm(
pd.testing.assert_frame_equal(pdf, expected)


@pytest.mark.skipif(not ENABLE_INTEGRATION, reason=SKIP_MESSAGE)
@pytest.mark.parametrize(
"fragments,limit,version,expected",
[
pytest.param(
"share8.default.dv_and_cm_table",
0,
None,
pd.DataFrame(columns=["id", "rand", "partition_col"]),
id="test empty table share",
)
],
)
def test_load_as_pandas_success_empty_dv_and_cm(
profile_path: str,
fragments: str,
limit: Optional[int],
version: Optional[int],
expected: pd.DataFrame
):
pdf = load_as_pandas(f"{profile_path}#{fragments}", limit, version, None)
pd.testing.assert_frame_equal(pdf, expected)


# We will test predicates with the table share8.default.cdf_table_with_partition
# This table is partitioned by birthday column of type date.
# There are two partitions: 2020-02-02, and 2020-01-01.
Expand Down

0 comments on commit 5c1f263

Please sign in to comment.