Skip to content

Commit

Permalink
Fixed multivalues on Categorize and added tests using other fields
Browse files Browse the repository at this point in the history
  • Loading branch information
ivancea committed Nov 27, 2024
1 parent 74aeae3 commit 3771532
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 11 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -105,9 +105,11 @@ public IntBlock eval(int positionCount, BytesRefBlock vBlock) {
continue;
}
int end = first + count;
result.beginPositionEntry();
for (int i = first; i < end; i++) {
result.appendInt(process(vBlock.getBytesRef(i, vScratch)));
}
result.endPositionEntry();
}
return result.build();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,28 +36,28 @@ mv
required_capability: categorize_v2

FROM mv_sample_data
| STATS COUNT() BY category=CATEGORIZE(message)
| STATS COUNT(), SUM(event_duration) BY category=CATEGORIZE(message)
| SORT category
;

COUNT():long | category:keyword
7 | .*?Banana.*?
3 | .*?Connected.+?to.*?
3 | .*?Connection.+?error.*?
1 | .*?Disconnected.*?
COUNT():long | SUM(event_duration):long | category:keyword
7 | 23231327 | .*?Banana.*?
3 | 7971589 | .*?Connected.+?to.*?
3 | 14027356 | .*?Connection.+?error.*?
1 | 1232382 | .*?Disconnected.*?
;

row mv
required_capability: categorize_v2

ROW message = ["connected to a", "connected to b", "disconnected"]
| STATS COUNT() BY category=CATEGORIZE(message)
ROW message = ["connected to a", "connected to b", "disconnected"], str = ["a", "b", "c"]
| STATS COUNT(), VALUES(str) BY category=CATEGORIZE(message)
| SORT category
;

COUNT():long | category:keyword
2 | .*?connected.+?to.*?
1 | .*?disconnected.*?
COUNT():long | VALUES(str):keyword | category:keyword
2 | [a, b, c] | .*?connected.+?to.*?
1 | [a, b, c] | .*?disconnected.*?
;

with multiple indices
Expand Down Expand Up @@ -92,6 +92,22 @@ COUNT():long | category:keyword
10 | .*?Head.+?Human.+?Resources.*?
;

# Throws when calling AbstractCategorizeBlockHash.seenGroupIds() - Requires nulls support?
mv with many values-Ignore
required_capability: categorize_v2

FROM employees
| STATS SUM(languages) BY category=CATEGORIZE(job_positions)
| SORT category DESC
| LIMIT 3
;

SUM(languages):integer | category:keyword
43 | .*?Accountant.*?
46 | .*?Architect.*?
35 | .*?Business.+?Analyst.*?
;

mv via eval
required_capability: categorize_v2

Expand Down

0 comments on commit 3771532

Please sign in to comment.