Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update cudf-polars for v1 release of polars #16149

Merged
merged 4 commits into from
Jul 2, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 1 addition & 4 deletions ci/test_cudf_polars.sh
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,8 @@ rapids-logger "Install cudf wheel"
# echo to expand wildcard before adding `[extra]` requires for pip
python -m pip install $(echo ./dist/cudf*.whl)[test]

rapids-logger "Install polars (allow pre-release versions)"
python -m pip install 'polars>=1.0.0a0'

rapids-logger "Install cudf_polars"
python -m pip install --no-deps python/cudf_polars
python -m pip install python/cudf_polars
vyasr marked this conversation as resolved.
Show resolved Hide resolved

rapids-logger "Run cudf_polars tests"

Expand Down
2 changes: 1 addition & 1 deletion dependencies.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -603,7 +603,7 @@ dependencies:
common:
- output_types: [conda, requirements, pyproject]
packages:
- polars>=0.20.30
- polars>=1.0
run_dask_cudf:
common:
- output_types: [conda, requirements, pyproject]
Expand Down
6 changes: 3 additions & 3 deletions python/cudf_polars/cudf_polars/dsl/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -978,15 +978,15 @@ def collect_agg(self, *, depth: int) -> AggInfo:
class Agg(Expr):
__slots__ = ("name", "options", "op", "request", "children")
_non_child = ("dtype", "name", "options")
children: tuple[Expr]
children: tuple[Expr, ...]

def __init__(
self, dtype: plc.DataType, name: str, options: Any, value: Expr
self, dtype: plc.DataType, name: str, options: Any, *children: Expr
) -> None:
super().__init__(dtype)
self.name = name
self.options = options
self.children = (value,)
self.children = children
if name not in Agg._SUPPORTED:
raise NotImplementedError(
f"Unsupported aggregation {name=}"
Expand Down
11 changes: 9 additions & 2 deletions python/cudf_polars/cudf_polars/dsl/ir.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

import dataclasses
import itertools
import json
import types
from functools import cache
from typing import TYPE_CHECKING, Any, Callable, ClassVar
Expand Down Expand Up @@ -180,8 +181,10 @@ def __post_init__(self):
class Scan(IR):
"""Input from files."""

typ: Any
typ: str
"""What type of file are we reading? Parquet, CSV, etc..."""
options: tuple[Any, ...]
"""Type specific options, as json-encoded strings."""
paths: list[str]
"""List of paths to read from."""
file_options: Any
Expand Down Expand Up @@ -211,17 +214,21 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame:
with_columns = options.with_columns
row_index = options.row_index
if self.typ == "csv":
opts, cloud_opts = map(json.loads, self.options)
vyasr marked this conversation as resolved.
Show resolved Hide resolved
df = DataFrame.from_cudf(
cudf.concat(
[cudf.read_csv(p, usecols=with_columns) for p in self.paths]
)
)
elif self.typ == "parquet":
opts, cloud_opts = map(json.loads, self.options)
cdf = cudf.read_parquet(self.paths, columns=with_columns)
assert isinstance(cdf, cudf.DataFrame)
df = DataFrame.from_cudf(cdf)
else:
assert_never(self.typ)
raise NotImplementedError(
f"Unhandled scan type: {self.typ}"
) # pragma: no cover; post init trips first
if row_index is not None:
name, offset = row_index
dtype = self.schema[name]
Expand Down
6 changes: 4 additions & 2 deletions python/cudf_polars/cudf_polars/dsl/translate.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,9 +87,11 @@ def _(
def _(
node: pl_ir.Scan, visitor: NodeTraverser, schema: dict[str, plc.DataType]
) -> ir.IR:
typ, *options = node.scan_type
return ir.Scan(
schema,
node.scan_type,
typ,
tuple(options),
node.paths,
node.file_options,
translate_named_expr(visitor, n=node.predicate)
Expand Down Expand Up @@ -445,7 +447,7 @@ def _(node: pl_expr.Agg, visitor: NodeTraverser, dtype: plc.DataType) -> expr.Ex
dtype,
node.name,
node.options,
translate_expr(visitor, n=node.arguments),
*(translate_expr(visitor, n=n) for n in node.arguments),
)


Expand Down
2 changes: 1 addition & 1 deletion python/cudf_polars/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ license = { text = "Apache 2.0" }
requires-python = ">=3.9"
dependencies = [
"cudf==24.8.*,>=0.0.0a0",
"polars>=0.20.30",
"polars>=1.0",
] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
classifiers = [
"Intended Audience :: Developers",
Expand Down
Loading