dask-contrib · jacobtomlinson · Apr 3, 2024 · Mar 8, 2024 · Mar 8, 2024 · Mar 8, 2024
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -32,7 +32,7 @@ repos:
           # Type stubs
           - boto3-stubs
           - dask
-          - deltalake
+          - deltalake>=0.16
           - pandas-stubs
           - pytest
           - types-setuptools

diff --git a/dask_deltatable/core.py b/dask_deltatable/core.py
@@ -2,14 +2,12 @@
 
 import os
 from collections.abc import Sequence
-from functools import partial
 from typing import Any, Callable, cast
 
 import dask
 import dask.dataframe as dd
 import pyarrow as pa
 import pyarrow.parquet as pq
-from dask.base import tokenize
 from dask.dataframe.io.parquet.arrow import ArrowDatasetEngine
 from dask.dataframe.utils import make_meta
 from deltalake import DataCatalog, DeltaTable
@@ -117,11 +115,14 @@ def _read_from_filesystem(
         meta = meta[columns]
 
     return dd.from_map(
-        partial(_read_delta_partition, fs=fs, columns=columns, schema=schema, **kwargs),
+        _read_delta_partition,
         pq_files,
         meta=meta,
         label="read-delta-table",
-        token=tokenize(path, fs_token, **kwargs),
+        fs=fs,
+        columns=columns,
+        schema=schema,
+        **kwargs,
     )
 
 

diff --git a/dask_deltatable/write.py b/dask_deltatable/write.py
@@ -16,7 +16,7 @@
 from dask.highlevelgraph import HighLevelGraph
 from deltalake import DeltaTable
 from deltalake.writer import (
-    MAX_SUPPORTED_WRITER_VERSION,
+    MAX_SUPPORTED_PYARROW_WRITER_VERSION,
     PYARROW_MAJOR_VERSION,
     AddAction,
     DeltaJSONEncoder,
@@ -167,11 +167,11 @@ def to_deltalake(
         else:
             partition_by = table.metadata().partition_columns
 
-        if table.protocol().min_writer_version > MAX_SUPPORTED_WRITER_VERSION:
+        if table.protocol().min_writer_version > MAX_SUPPORTED_PYARROW_WRITER_VERSION:
             raise DeltaProtocolError(
                 "This table's min_writer_version is "
                 f"{table.protocol().min_writer_version}, "
-                f"but this method only supports version {MAX_SUPPORTED_WRITER_VERSION}."
+                f"but this method only supports version {MAX_SUPPORTED_PYARROW_WRITER_VERSION}."
             )
     else:  # creating a new table
         current_version = -1

diff --git a/requirements.txt b/requirements.txt
@@ -1,4 +1,4 @@
 dask[dataframe]
-deltalake>=0.15
+deltalake>=0.16
 fsspec
 pyarrow
diff --git a/tests/test_acceptance.py b/tests/test_acceptance.py
@@ -51,6 +51,7 @@ def test_reader_all_primitive_types():
     # timestamp differently. This is likely a bug in arrow but the delta result
     # is "more correct".
     expected_ddf["timestamp"] = expected_ddf["timestamp"].astype("datetime64[us]")
+    expected_ddf["timestamp"] = expected_ddf["timestamp"].dt.tz_localize("UTC")
     assert_eq(actual_ddf, expected_ddf)