Skip to content

Commit 1c404a8

Browse files
authored
fix: in PQ post processing, revert sort columns just before propagating down pipeline (#5098)
1 parent a54b815 commit 1c404a8

10 files changed

+761
-35
lines changed

.pre-commit-config.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ repos:
4949
hooks:
5050
- id: actionlint
5151
- repo: https://github.com/tcort/markdown-link-check
52-
rev: v3.13.6
52+
rev: v3.12.2
5353
hooks:
5454
- id: markdown-link-check
5555
name: markdown-link-check-local

CHANGELOG.md

+2
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010

1111
- Sort steps in sub-pipelines no longer cause a column lookup error
1212
(@lukapeschke, #5066)
13+
- Dereferencing of sort columns when rendering SQL now done in context of main
14+
pipeline (@kgutwin, #5098)
1315

1416
**Documentation**:
1517

prqlc/prqlc/src/sql/pq/anchor.rs

+30-1
Original file line numberDiff line numberDiff line change
@@ -643,15 +643,44 @@ impl<'a> CidRedirector<'a> {
643643
ctx: &'a mut AnchorContext,
644644
) -> Vec<ColumnSort<CId>> {
645645
let cid_redirects = ctx.relation_instances[riid].cid_redirects.clone();
646+
log::debug!("redirect sorts {sorts:?} {riid:?} cid_redirects {cid_redirects:?}");
646647
let mut redirector = CidRedirector { ctx, cid_redirects };
647648

648649
fold_column_sorts(&mut redirector, sorts).unwrap()
649650
}
651+
652+
// revert sort columns back to their original pre-split columns
653+
pub fn revert_sorts(
654+
sorts: Vec<ColumnSort<CId>>,
655+
ctx: &'a mut AnchorContext,
656+
) -> Vec<ColumnSort<CId>> {
657+
sorts
658+
.into_iter()
659+
.map(|sort| {
660+
let decl = ctx.column_decls.get(&sort.column).unwrap();
661+
if let ColumnDecl::RelationColumn(riid, cid, _) = decl {
662+
let cid_redirects = &ctx.relation_instances[riid].cid_redirects;
663+
for (source, target) in cid_redirects.iter() {
664+
if target == cid {
665+
log::debug!("reverting {target:?} back to {source:?}");
666+
return ColumnSort {
667+
direction: sort.direction,
668+
column: *source,
669+
};
670+
}
671+
}
672+
}
673+
sort
674+
})
675+
.collect()
676+
}
650677
}
651678

652679
impl RqFold for CidRedirector<'_> {
653680
fn fold_cid(&mut self, cid: CId) -> Result<CId> {
654-
Ok(self.cid_redirects.get(&cid).cloned().unwrap_or(cid))
681+
let v = self.cid_redirects.get(&cid).cloned().unwrap_or(cid);
682+
log::debug!("mapping {cid:?} via {0:?} to {v:?}", self.cid_redirects);
683+
Ok(v)
655684
}
656685

657686
fn fold_transform(&mut self, transform: Transform) -> Result<Transform> {

prqlc/prqlc/src/sql/pq/postprocess.rs

+35-33
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ fn infer_sorts(query: SqlQuery, ctx: &mut Context) -> SqlQuery {
2727
let mut s = SortingInference {
2828
last_sorting: Vec::new(),
2929
ctes_sorting: HashMap::new(),
30+
main_relation: false,
3031
ctx,
3132
};
3233

@@ -36,12 +37,12 @@ fn infer_sorts(query: SqlQuery, ctx: &mut Context) -> SqlQuery {
3637
struct SortingInference<'a> {
3738
last_sorting: Sorting,
3839
ctes_sorting: HashMap<TId, CteSorting>,
40+
main_relation: bool,
3941
ctx: &'a mut Context,
4042
}
4143

4244
struct CteSorting {
4345
sorting: Sorting,
44-
has_been_used: bool,
4546
}
4647

4748
impl RqFold for SortingInference<'_> {}
@@ -50,51 +51,29 @@ impl PqFold for SortingInference<'_> {
5051
fn fold_sql_query(&mut self, query: SqlQuery) -> Result<SqlQuery> {
5152
let mut ctes = Vec::with_capacity(query.ctes.len());
5253
for cte in query.ctes {
54+
log::debug!("infer_sorts: {0:?}", cte.tid);
5355
let cte = self.fold_cte(cte)?;
5456

5557
// store sorting to be used later in From references
5658
let sorting = self.last_sorting.drain(..).collect();
57-
let sorting = CteSorting {
58-
sorting,
59-
has_been_used: false,
60-
};
59+
log::debug!("--- sorting {sorting:?}");
60+
let sorting = CteSorting { sorting };
6161
self.ctes_sorting.insert(cte.tid, sorting);
6262

6363
ctes.push(cte);
6464
}
6565

66-
// fold main_relation using a made-up tid
66+
// fold main_relation
67+
log::debug!("infer_sorts: main relation");
68+
self.main_relation = true;
6769
let mut main_relation = self.fold_sql_relation(query.main_relation)?;
70+
log::debug!("--== last_sorting {0:?}", self.last_sorting);
6871

6972
// push a sort at the back of the main pipeline
7073
if let SqlRelation::AtomicPipeline(pipeline) = &mut main_relation {
7174
pipeline.push(SqlTransform::Sort(self.last_sorting.drain(..).collect()));
7275
}
7376

74-
// make sure that all CTEs whose sorting was used actually SELECT it
75-
for cte in &mut ctes {
76-
let sorting = self.ctes_sorting.get(&cte.tid).unwrap();
77-
if !sorting.has_been_used {
78-
continue;
79-
}
80-
81-
let CteKind::Normal(sql_relation) = &mut cte.kind else {
82-
continue;
83-
};
84-
let Some(pipeline) = sql_relation.as_atomic_pipeline_mut() else {
85-
continue;
86-
};
87-
let select = pipeline.iter_mut().find_map(|x| x.as_select_mut()).unwrap();
88-
89-
for column_sort in &sorting.sorting {
90-
let cid = column_sort.column;
91-
let is_selected = select.contains(&cid);
92-
if !is_selected {
93-
select.push(cid);
94-
}
95-
}
96-
}
97-
9877
Ok(SqlQuery {
9978
ctes,
10079
main_relation,
@@ -116,6 +95,7 @@ impl PqMapper<RelationExpr, RelationExpr, (), ()> for SortingInference<'_> {
11695
transforms: Vec<SqlTransform<RelationExpr, ()>>,
11796
) -> Result<Vec<SqlTransform<RelationExpr, ()>>> {
11897
let mut sorting = Vec::new();
98+
let mut has_sort_transform = false;
11999

120100
let mut result = Vec::with_capacity(transforms.len() + 1);
121101

@@ -126,7 +106,6 @@ impl PqMapper<RelationExpr, RelationExpr, (), ()> for SortingInference<'_> {
126106
RelationExprKind::Ref(ref tid) => {
127107
// infer sorting from referenced pipeline
128108
if let Some(cte_sorting) = self.ctes_sorting.get_mut(tid) {
129-
cte_sorting.has_been_used = true;
130109
sorting.clone_from(&cte_sorting.sorting);
131110
} else {
132111
sorting = Vec::new();
@@ -147,8 +126,9 @@ impl PqMapper<RelationExpr, RelationExpr, (), ()> for SortingInference<'_> {
147126
}
148127

149128
// just store sorting and don't emit Sort
150-
SqlTransform::Sort(s) => {
151-
sorting.clone_from(&s);
129+
SqlTransform::Sort(expr) => {
130+
sorting.clone_from(&expr);
131+
has_sort_transform = true;
152132
continue;
153133
}
154134

@@ -166,6 +146,28 @@ impl PqMapper<RelationExpr, RelationExpr, (), ()> for SortingInference<'_> {
166146
result.push(transform)
167147
}
168148

149+
if !self.main_relation {
150+
// if this is a CTE, make sure that its SELECT includes the
151+
// columns from the sort
152+
let select = result.iter_mut().find_map(|x| x.as_select_mut()).unwrap();
153+
for column_sort in &sorting {
154+
let cid = column_sort.column;
155+
let is_selected = select.contains(&cid);
156+
if !is_selected {
157+
log::debug!("adding {cid:?} to {select:?}");
158+
select.push(cid);
159+
}
160+
}
161+
162+
if has_sort_transform {
163+
// now revert the sort columns so that the output
164+
// sorting reflects the input column cids, needed to
165+
// ensure proper column reference lookup in the final
166+
// steps
167+
sorting = CidRedirector::revert_sorts(sorting, &mut self.ctx.anchor);
168+
}
169+
}
170+
169171
// remember sorting for this pipeline
170172
self.last_sorting = sorting;
171173

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
from albums
2+
select { AA=album_id, artist_id }
3+
sort AA
4+
filter AA >= 25
5+
join artists (==artist_id)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
---
2+
source: prqlc/prqlc/tests/integration/queries.rs
3+
expression: "from albums\nselect { AA=album_id, artist_id }\nsort AA\nfilter AA >= 25\njoin artists (==artist_id)\n"
4+
input_file: prqlc/prqlc/tests/integration/queries/sort_2.prql
5+
---
6+
WITH table_1 AS (
7+
SELECT
8+
album_id AS "AA",
9+
artist_id
10+
FROM
11+
albums
12+
),
13+
table_0 AS (
14+
SELECT
15+
"AA",
16+
artist_id
17+
FROM
18+
table_1
19+
WHERE
20+
"AA" >= 25
21+
)
22+
SELECT
23+
table_0."AA",
24+
table_0.artist_id,
25+
artists.*
26+
FROM
27+
table_0
28+
JOIN artists ON table_0.artist_id = artists.artist_id
29+
ORDER BY
30+
table_0."AA"

0 commit comments

Comments
 (0)