Skip to content

Commit

Permalink
Update cudf-polars for v1 release of polars (#16149)
Browse files Browse the repository at this point in the history
Minor changes to the IR, which we adapt to, and request `polars>=1.0` in dependencies.

Authors:
  - Lawrence Mitchell (https://github.com/wence-)
  - Thomas Li (https://github.com/lithomas1)
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: #16149
  • Loading branch information
wence- authored Jul 2, 2024
1 parent 760c15c commit 08552f8
Show file tree
Hide file tree
Showing 6 changed files with 19 additions and 12 deletions.
4 changes: 1 addition & 3 deletions ci/test_cudf_polars.sh
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,8 @@ rapids-logger "Install cudf wheel"
# echo to expand wildcard before adding `[extra]` requires for pip
python -m pip install $(echo ./dist/cudf*.whl)[test]

rapids-logger "Install polars (allow pre-release versions)"
python -m pip install 'polars>=1.0.0a0'

rapids-logger "Install cudf_polars"
python -m pip install 'polars>=1.0'
python -m pip install --no-deps python/cudf_polars

rapids-logger "Run cudf_polars tests"
Expand Down
2 changes: 1 addition & 1 deletion dependencies.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -603,7 +603,7 @@ dependencies:
common:
- output_types: [conda, requirements, pyproject]
packages:
- polars>=0.20.30
- polars>=1.0
run_dask_cudf:
common:
- output_types: [conda, requirements, pyproject]
Expand Down
6 changes: 3 additions & 3 deletions python/cudf_polars/cudf_polars/dsl/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -978,15 +978,15 @@ def collect_agg(self, *, depth: int) -> AggInfo:
class Agg(Expr):
__slots__ = ("name", "options", "op", "request", "children")
_non_child = ("dtype", "name", "options")
children: tuple[Expr]
children: tuple[Expr, ...]

def __init__(
self, dtype: plc.DataType, name: str, options: Any, value: Expr
self, dtype: plc.DataType, name: str, options: Any, *children: Expr
) -> None:
super().__init__(dtype)
self.name = name
self.options = options
self.children = (value,)
self.children = children
if name not in Agg._SUPPORTED:
raise NotImplementedError(
f"Unsupported aggregation {name=}"
Expand Down
11 changes: 9 additions & 2 deletions python/cudf_polars/cudf_polars/dsl/ir.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

import dataclasses
import itertools
import json
import types
from functools import cache
from typing import TYPE_CHECKING, Any, Callable, ClassVar
Expand Down Expand Up @@ -180,8 +181,10 @@ def __post_init__(self):
class Scan(IR):
"""Input from files."""

typ: Any
typ: str
"""What type of file are we reading? Parquet, CSV, etc..."""
options: tuple[Any, ...]
"""Type specific options, as json-encoded strings."""
paths: list[str]
"""List of paths to read from."""
file_options: Any
Expand Down Expand Up @@ -211,17 +214,21 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame:
with_columns = options.with_columns
row_index = options.row_index
if self.typ == "csv":
opts, cloud_opts = map(json.loads, self.options)
df = DataFrame.from_cudf(
cudf.concat(
[cudf.read_csv(p, usecols=with_columns) for p in self.paths]
)
)
elif self.typ == "parquet":
opts, cloud_opts = map(json.loads, self.options)
cdf = cudf.read_parquet(self.paths, columns=with_columns)
assert isinstance(cdf, cudf.DataFrame)
df = DataFrame.from_cudf(cdf)
else:
assert_never(self.typ)
raise NotImplementedError(
f"Unhandled scan type: {self.typ}"
) # pragma: no cover; post init trips first
if row_index is not None:
name, offset = row_index
dtype = self.schema[name]
Expand Down
6 changes: 4 additions & 2 deletions python/cudf_polars/cudf_polars/dsl/translate.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,9 +87,11 @@ def _(
def _(
node: pl_ir.Scan, visitor: NodeTraverser, schema: dict[str, plc.DataType]
) -> ir.IR:
typ, *options = node.scan_type
return ir.Scan(
schema,
node.scan_type,
typ,
tuple(options),
node.paths,
node.file_options,
translate_named_expr(visitor, n=node.predicate)
Expand Down Expand Up @@ -445,7 +447,7 @@ def _(node: pl_expr.Agg, visitor: NodeTraverser, dtype: plc.DataType) -> expr.Ex
dtype,
node.name,
node.options,
translate_expr(visitor, n=node.arguments),
*(translate_expr(visitor, n=n) for n in node.arguments),
)


Expand Down
2 changes: 1 addition & 1 deletion python/cudf_polars/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ license = { text = "Apache 2.0" }
requires-python = ">=3.9"
dependencies = [
"cudf==24.8.*,>=0.0.0a0",
"polars>=0.20.30",
"polars>=1.0",
] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
classifiers = [
"Intended Audience :: Developers",
Expand Down

0 comments on commit 08552f8

Please sign in to comment.