Skip to content

Commit

Permalink
feat: Support Struct field selection in the SQL engine
Browse files Browse the repository at this point in the history
  • Loading branch information
alexander-beedie committed Jun 21, 2024
1 parent 8a6bf4b commit 3bb8050
Show file tree
Hide file tree
Showing 4 changed files with 112 additions and 36 deletions.
2 changes: 1 addition & 1 deletion crates/polars-sql/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ description = "SQL transpiler for Polars. Converts SQL to Polars logical plans"
arrow = { workspace = true }
polars-core = { workspace = true, features = ["rows"] }
polars-error = { workspace = true }
polars-lazy = { workspace = true, features = ["abs", "binary_encoding", "concat_str", "cross_join", "cum_agg", "dtype-date", "dtype-decimal", "is_in", "list_eval", "log", "meta", "regex", "round_series", "sign", "string_reverse", "strings", "timezones", "trigonometry"] }
polars-lazy = { workspace = true, features = ["abs", "binary_encoding", "concat_str", "cross_join", "cum_agg", "dtype-date", "dtype-decimal", "dtype-struct", "is_in", "list_eval", "log", "meta", "regex", "round_series", "sign", "string_reverse", "strings", "timezones", "trigonometry"] }
polars-ops = { workspace = true }
polars-plan = { workspace = true }
polars-time = { workspace = true }
Expand Down
12 changes: 10 additions & 2 deletions crates/polars-sql/src/context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1179,9 +1179,17 @@ impl SQLContext {
options: &WildcardAdditionalOptions,
contains_wildcard_exclude: &mut bool,
) -> PolarsResult<Expr> {
if options.opt_except.is_some() {
polars_bail!(SQLSyntax: "EXCEPT not supported (use EXCLUDE instead)")
// bail on unsupported wildcard options
if options.opt_ilike.is_some() {
polars_bail!(SQLSyntax: "ILIKE wildcard option is unsupported")
} else if options.opt_rename.is_some() {
polars_bail!(SQLSyntax: "RENAME wildcard option is unsupported")
} else if options.opt_replace.is_some() {
polars_bail!(SQLSyntax: "REPLACE wildcard option is unsupported")
} else if options.opt_except.is_some() {
polars_bail!(SQLSyntax: "EXCEPT wildcard option is unsupported (use EXCLUDE instead)")
}

Ok(match &options.opt_exclude {
Some(ExcludeSelectItem::Single(ident)) => {
*contains_wildcard_exclude = true;
Expand Down
74 changes: 41 additions & 33 deletions crates/polars-sql/src/sql_expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -375,42 +375,50 @@ impl SQLExprVisitor<'_> {

/// Visit a compound SQL identifier
///
/// e.g. df.column or "df"."column"
/// e.g. tbl.column, struct.field, tbl.struct.field (inc. nested struct fields)
fn visit_compound_identifier(&mut self, idents: &[Ident]) -> PolarsResult<Expr> {
match idents {
[tbl_name, column_name] => {
let mut lf = self
.ctx
.get_table_from_current_scope(&tbl_name.value)
.ok_or_else(|| {
polars_err!(
SQLInterface: "no table or alias named '{}' found",
tbl_name
)
})?;

let schema =
lf.schema_with_arenas(&mut self.ctx.lp_arena, &mut self.ctx.expr_arena)?;
if let Some((_, name, _)) = schema.get_full(&column_name.value) {
let resolved = &self.ctx.resolve_name(&tbl_name.value, &column_name.value);
Ok(if name != resolved {
col(resolved).alias(name)
} else {
col(name)
})
// inference priority: table > struct > column
let ident_root = &idents[0];
let mut remaining_idents = idents.iter().skip(1);
let mut lf = self.ctx.get_table_from_current_scope(&ident_root.value);

let schema = if let Some(ref mut lf) = lf {
lf.schema_with_arenas(&mut self.ctx.lp_arena, &mut self.ctx.expr_arena)
} else {
Ok(Arc::new(if let Some(active_schema) = self.active_schema {
active_schema.clone()
} else {
Schema::new()
}))
};

let mut column: PolarsResult<Expr> = if lf.is_none() {
Ok(col(&ident_root.value))
} else {
let col_name = &remaining_idents.next().unwrap().value;
if let Some((_, name, _)) = schema?.get_full(col_name) {
let resolved = &self.ctx.resolve_name(&ident_root.value, col_name);
Ok(if name != resolved {
col(resolved).alias(name)
} else {
polars_bail!(
SQLInterface: "no column named '{}' found in table '{}'",
column_name,
tbl_name
)
}
},
_ => polars_bail!(
SQLInterface: "invalid identifier {:?}",
idents
),
col(name)
})
} else {
polars_bail!(
SQLInterface: "no column named '{}' found in table '{}'",
col_name,
ident_root
)
}
};
// additional ident levels index into struct fields
for ident in remaining_idents {
column = Ok(column
.unwrap()
.struct_()
.field_by_name(ident.value.as_str()));
}
column
}

fn visit_interval(&self, interval: &Interval) -> PolarsResult<Expr> {
Expand Down
60 changes: 60 additions & 0 deletions py-polars/tests/unit/sql/test_structs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
from __future__ import annotations

import pytest

import polars as pl
from polars.exceptions import StructFieldNotFoundError
from polars.testing import assert_frame_equal


@pytest.fixture()
def struct_df() -> pl.DataFrame:
return pl.DataFrame(
{
"id": [100, 200, 300, 400],
"name": ["Alice", "Bob", "David", "Zoe"],
"age": [32, 27, 19, 45],
"other": [{"n": 1.5}, {"n": None}, {"n": -0.5}, {"n": 2.0}],
}
).select(pl.struct(pl.all()).alias("json_msg"))


def test_struct_field_selection(struct_df: pl.DataFrame) -> None:
res = struct_df.sql(
"""
SELECT
-- validate table alias resolution
frame.json_msg.id AS ID,
self.json_msg.name AS NAME,
json_msg.age AS AGE
FROM
self AS frame
WHERE
json_msg.age > 20 AND
json_msg.other.n IS NOT NULL -- note: nested struct field
ORDER BY
json_msg.name DESC
"""
)

expected = pl.DataFrame(
{
"ID": [400, 100],
"NAME": ["Zoe", "Alice"],
"AGE": [45, 32],
}
)
assert_frame_equal(expected, res)


@pytest.mark.parametrize(
"invalid_column",
[
"json_msg.invalid_column",
"json_msg.other.invalid_column",
"self.json_msg.other.invalid_column",
],
)
def test_struct_indexing_errors(invalid_column: str, struct_df: pl.DataFrame) -> None:
with pytest.raises(StructFieldNotFoundError, match="invalid_column"):
struct_df.sql(f"SELECT {invalid_column} FROM self")

0 comments on commit 3bb8050

Please sign in to comment.