diff --git a/ci/test_cudf_polars.sh b/ci/test_cudf_polars.sh index 669e049ab26..95fb4b431bf 100755 --- a/ci/test_cudf_polars.sh +++ b/ci/test_cudf_polars.sh @@ -28,10 +28,8 @@ rapids-logger "Install cudf wheel" # echo to expand wildcard before adding `[extra]` requires for pip python -m pip install $(echo ./dist/cudf*.whl)[test] -rapids-logger "Install polars (allow pre-release versions)" -python -m pip install 'polars>=1.0.0a0' - rapids-logger "Install cudf_polars" +python -m pip install 'polars>=1.0' python -m pip install --no-deps python/cudf_polars rapids-logger "Run cudf_polars tests" diff --git a/dependencies.yaml b/dependencies.yaml index 9efbc47896c..e3f8a72e76c 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -603,7 +603,7 @@ dependencies: common: - output_types: [conda, requirements, pyproject] packages: - - polars>=0.20.30 + - polars>=1.0 run_dask_cudf: common: - output_types: [conda, requirements, pyproject] diff --git a/python/cudf_polars/cudf_polars/dsl/expr.py b/python/cudf_polars/cudf_polars/dsl/expr.py index 16cfd9b9749..fe859c8d958 100644 --- a/python/cudf_polars/cudf_polars/dsl/expr.py +++ b/python/cudf_polars/cudf_polars/dsl/expr.py @@ -978,15 +978,15 @@ def collect_agg(self, *, depth: int) -> AggInfo: class Agg(Expr): __slots__ = ("name", "options", "op", "request", "children") _non_child = ("dtype", "name", "options") - children: tuple[Expr] + children: tuple[Expr, ...] def __init__( - self, dtype: plc.DataType, name: str, options: Any, value: Expr + self, dtype: plc.DataType, name: str, options: Any, *children: Expr ) -> None: super().__init__(dtype) self.name = name self.options = options - self.children = (value,) + self.children = children if name not in Agg._SUPPORTED: raise NotImplementedError( f"Unsupported aggregation {name=}" diff --git a/python/cudf_polars/cudf_polars/dsl/ir.py b/python/cudf_polars/cudf_polars/dsl/ir.py index abe26b14a90..9b3096becd4 100644 --- a/python/cudf_polars/cudf_polars/dsl/ir.py +++ b/python/cudf_polars/cudf_polars/dsl/ir.py @@ -15,6 +15,7 @@ import dataclasses import itertools +import json import types from functools import cache from typing import TYPE_CHECKING, Any, Callable, ClassVar @@ -180,8 +181,10 @@ def __post_init__(self): class Scan(IR): """Input from files.""" - typ: Any + typ: str """What type of file are we reading? Parquet, CSV, etc...""" + options: tuple[Any, ...] + """Type specific options, as json-encoded strings.""" paths: list[str] """List of paths to read from.""" file_options: Any @@ -211,17 +214,21 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame: with_columns = options.with_columns row_index = options.row_index if self.typ == "csv": + opts, cloud_opts = map(json.loads, self.options) df = DataFrame.from_cudf( cudf.concat( [cudf.read_csv(p, usecols=with_columns) for p in self.paths] ) ) elif self.typ == "parquet": + opts, cloud_opts = map(json.loads, self.options) cdf = cudf.read_parquet(self.paths, columns=with_columns) assert isinstance(cdf, cudf.DataFrame) df = DataFrame.from_cudf(cdf) else: - assert_never(self.typ) + raise NotImplementedError( + f"Unhandled scan type: {self.typ}" + ) # pragma: no cover; post init trips first if row_index is not None: name, offset = row_index dtype = self.schema[name] diff --git a/python/cudf_polars/cudf_polars/dsl/translate.py b/python/cudf_polars/cudf_polars/dsl/translate.py index f4bf07ae1e0..a2fdb3c3d79 100644 --- a/python/cudf_polars/cudf_polars/dsl/translate.py +++ b/python/cudf_polars/cudf_polars/dsl/translate.py @@ -87,9 +87,11 @@ def _( def _( node: pl_ir.Scan, visitor: NodeTraverser, schema: dict[str, plc.DataType] ) -> ir.IR: + typ, *options = node.scan_type return ir.Scan( schema, - node.scan_type, + typ, + tuple(options), node.paths, node.file_options, translate_named_expr(visitor, n=node.predicate) @@ -445,7 +447,7 @@ def _(node: pl_expr.Agg, visitor: NodeTraverser, dtype: plc.DataType) -> expr.Ex dtype, node.name, node.options, - translate_expr(visitor, n=node.arguments), + *(translate_expr(visitor, n=n) for n in node.arguments), ) diff --git a/python/cudf_polars/pyproject.toml b/python/cudf_polars/pyproject.toml index effa4861e0c..bf4673fcc50 100644 --- a/python/cudf_polars/pyproject.toml +++ b/python/cudf_polars/pyproject.toml @@ -20,7 +20,7 @@ license = { text = "Apache 2.0" } requires-python = ">=3.9" dependencies = [ "cudf==24.8.*,>=0.0.0a0", - "polars>=0.20.30", + "polars>=1.0", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. classifiers = [ "Intended Audience :: Developers",