Skip to content

Commit

Permalink
[SPARK-45430] Fix for FramelessOffsetWindowFunction when IGNORE NULLS…
Browse files Browse the repository at this point in the history
… and offset > rowCount

### What changes were proposed in this pull request?

This is a fix for the failure when function that utilized `FramelessOffsetWindowFunctionFrame` is used with `ignoreNulls = true` and `offset > rowCount`.

e.g.

```
select x, lead(x, 5) IGNORE NULLS over (order by x) from (select explode(sequence(1, 3)) x)
```

### Why are the changes needed?

Fix existing bug

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Modify existing unit test to cover this case

### Was this patch authored or co-authored using generative AI tooling?

No

Closes #43236 from vitaliili-db/SPARK-45430.

Authored-by: Vitalii Li <vitalii.li@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
(cherry picked from commit 32e1e58)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
  • Loading branch information
vitaliili-db authored and cloud-fan committed Oct 24, 2023
1 parent 06f4885 commit da08382
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 19 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,11 @@ class FrameLessOffsetWindowFunctionFrame(
override def prepare(rows: ExternalAppendOnlyUnsafeRowArray): Unit = {
resetStates(rows)
if (ignoreNulls) {
findNextRowWithNonNullInput()
if (Math.abs(offset) > rows.length) {
fillDefaultValue(EmptyRow)
} else {
findNextRowWithNonNullInput()
}
} else {
// drain the first few rows if offset is larger than zero
while (inputIndex < offset) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -819,34 +819,38 @@ class DataFrameWindowFunctionsSuite extends QueryTest
lead($"value", 1, null, true).over(window),
lead($"value", 2, null, true).over(window),
lead($"value", 3, null, true).over(window),
// offset > rowCount: SPARK-45430
lead($"value", 100, null, true).over(window),
lead(concat($"value", $"key"), 1, null, true).over(window),
lag($"value", 1).over(window),
lag($"value", 2).over(window),
lag($"value", 0, null, true).over(window),
lag($"value", 1, null, true).over(window),
lag($"value", 2, null, true).over(window),
lag($"value", 3, null, true).over(window),
// abs(offset) > rowCount: SPARK-45430
lag($"value", -100, null, true).over(window),
lag(concat($"value", $"key"), 1, null, true).over(window))
.orderBy($"order"),
Seq(
Row("a", 0, null, "x", null, null, "x", "y", "z", "xa",
null, null, null, null, null, null, null),
Row("a", 1, "x", null, null, "x", "y", "z", "v", "ya",
null, null, "x", null, null, null, null),
Row("b", 2, null, null, "y", null, "y", "z", "v", "ya",
"x", null, null, "x", null, null, "xa"),
Row("c", 3, null, "y", null, null, "y", "z", "v", "ya",
null, "x", null, "x", null, null, "xa"),
Row("a", 4, "y", null, "z", "y", "z", "v", null, "za",
null, null, "y", "x", null, null, "xa"),
Row("b", 5, null, "z", "v", null, "z", "v", null, "za",
"y", null, null, "y", "x", null, "ya"),
Row("a", 6, "z", "v", null, "z", "v", null, null, "va",
null, "y", "z", "y", "x", null, "ya"),
Row("a", 7, "v", null, null, "v", null, null, null, null,
"z", null, "v", "z", "y", "x", "za"),
Row("a", 8, null, null, null, null, null, null, null, null,
"v", "z", null, "v", "z", "y", "va")))
Row("a", 0, null, "x", null, null, "x", "y", "z", null, "xa",
null, null, null, null, null, null, null, null),
Row("a", 1, "x", null, null, "x", "y", "z", "v", null, "ya",
null, null, "x", null, null, null, null, null),
Row("b", 2, null, null, "y", null, "y", "z", "v", null, "ya",
"x", null, null, "x", null, null, null, "xa"),
Row("c", 3, null, "y", null, null, "y", "z", "v", null, "ya",
null, "x", null, "x", null, null, null, "xa"),
Row("a", 4, "y", null, "z", "y", "z", "v", null, null, "za",
null, null, "y", "x", null, null, null, "xa"),
Row("b", 5, null, "z", "v", null, "z", "v", null, null, "za",
"y", null, null, "y", "x", null, null, "ya"),
Row("a", 6, "z", "v", null, "z", "v", null, null, null, "va",
null, "y", "z", "y", "x", null, null, "ya"),
Row("a", 7, "v", null, null, "v", null, null, null, null, null,
"z", null, "v", "z", "y", "x", null, "za"),
Row("a", 8, null, null, null, null, null, null, null, null, null,
"v", "z", null, "v", "z", "y", null, "va")))
}

test("lag - Offset expression <offset> must be a literal") {
Expand Down

0 comments on commit da08382

Please sign in to comment.