Skip to content

Commit

Permalink
Simple deltalake benchmark.
Browse files Browse the repository at this point in the history
  • Loading branch information
j-bennet committed Jul 21, 2023
1 parent 1b31742 commit e689969
Show file tree
Hide file tree
Showing 3 changed files with 61 additions and 24 deletions.
47 changes: 24 additions & 23 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,29 +32,30 @@ jobs:
matrix:
os: [ubuntu-latest]
python-version: ["3.9"]
pytest_args: [tests]
include:
# Run stability tests on the lowest and highest versions of Python only
# These are temporarily redundant with the current global python-version
# - pytest_args: tests/stability
# python-version: "3.9"
# os: ubuntu-latest
# - pytest_args: tests/stability
# python-version: "3.9"
# os: ubuntu-latest
- pytest_args: tests/stability
python-version: "3.11"
os: ubuntu-latest
- pytest_args: tests/stability
python-version: "3.11"
os: ubuntu-latest
# Run stability tests on Python Windows and MacOS (latest py39 only)
- pytest_args: tests/stability
python-version: "3.9"
os: windows-latest
- pytest_args: tests/stability
python-version: "3.9"
os: macos-latest
# pytest_args: [tests]
pytest_args: [tests/benchmarks/test_deltalake.py]
# include:
# # Run stability tests on the lowest and highest versions of Python only
# # These are temporarily redundant with the current global python-version
# # - pytest_args: tests/stability
# # python-version: "3.9"
# # os: ubuntu-latest
# # - pytest_args: tests/stability
# # python-version: "3.9"
# # os: ubuntu-latest
# - pytest_args: tests/stability
# python-version: "3.11"
# os: ubuntu-latest
# - pytest_args: tests/stability
# python-version: "3.11"
# os: ubuntu-latest
# # Run stability tests on Python Windows and MacOS (latest py39 only)
# - pytest_args: tests/stability
# python-version: "3.9"
# os: windows-latest
# - pytest_args: tests/stability
# python-version: "3.9"
# os: macos-latest

steps:
- name: Checkout
Expand Down
4 changes: 3 additions & 1 deletion ci/environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -41,4 +41,6 @@ dependencies:
- gilknocker ==0.4.1
- openssl >1.1.0g
- pyopenssl ==22.1.0 # Pinned by snowflake-connector-python
- cryptography ==38.0.4 # Pinned by snowflake-connector-python
- cryptography ==38.0.4 # Pinned by snowflake-connector-python
- pip:
- git+https://github.com/dask-contrib/dask-deltatable.git # TODO: link to release version
34 changes: 34 additions & 0 deletions tests/benchmarks/test_deltalake.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
import dask.dataframe as dd
import dask_deltatable as ddt
import pytest


@pytest.fixture(params=["read_deltalake", "read_parquet"])
def ddf(request, small_client):
uri = "s3://coiled-datasets/delta/ds20f_100M/"
if request.param == "read_deltalake":
yield ddt.read_deltalake(uri)
else:
yield dd.read_parquet(f"{uri}*.parquet", engine="pyarrow")


def test_column_agg(ddf):
ddf["float1"].agg(["sum", "mean"]).compute()


def test_group_agg(ddf):
ddf = ddf[["int1", "int2", "int3"]]
(
ddf.groupby(["int2", "int3"], dropna=False, observed=True)
.agg({"int1": ["sum", "mean"]})
.compute()
)


def test_group_median(ddf, shuffle_method):
ddf = ddf[["int1", "int2", "int3"]]
(
ddf.groupby(["int2", "int3"], dropna=False, observed=True)
.agg({"int1": ["median", "std"]}, shuffle=shuffle_method)
.compute()
)

0 comments on commit e689969

Please sign in to comment.