Skip to content

Commit

Permalink
Support approx_count_distinct (#499)
Browse files Browse the repository at this point in the history
Add support for `approx_count_distinct()`
  • Loading branch information
dpxcc authored Jan 2, 2025
1 parent 3b34bee commit 6941855
Show file tree
Hide file tree
Showing 7 changed files with 60 additions and 1 deletion.
16 changes: 16 additions & 0 deletions sql/pg_duckdb--0.2.0--0.3.0.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
CREATE FUNCTION @extschema@.approx_count_distinct_sfunc(bigint, anyelement)
RETURNS bigint LANGUAGE 'plpgsql'
SET search_path = pg_catalog, pg_temp
AS
$func$
BEGIN
RAISE EXCEPTION 'Aggregate `approx_count_distinct(ANYELEMENT)` only works with Duckdb execution.';
END;
$func$;

CREATE AGGREGATE @extschema@.approx_count_distinct(anyelement)
(
sfunc = @extschema@.approx_count_distinct_sfunc,
stype = bigint,
initcond = 0
);
2 changes: 1 addition & 1 deletion src/pgduckdb_metadata_cache.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ BuildDuckdbOnlyFunctions() {
* caching its OID as a DuckDB-only function.
*/
const char *function_names[] = {"read_parquet", "read_csv", "iceberg_scan", "iceberg_metadata",
"iceberg_snapshots", "delta_scan", "read_json"};
"iceberg_snapshots", "delta_scan", "read_json", "approx_count_distinct"};

for (uint32_t i = 0; i < lengthof(function_names); i++) {
CatCList *catlist = SearchSysCacheList1(PROCNAMEARGSNSP, CStringGetDatum(function_names[i]));
Expand Down
33 changes: 33 additions & 0 deletions test/regression/expected/approx_count_distinct.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
CREATE TABLE t (a int, b text);
INSERT INTO t VALUES (1, 'a'), (2, 'b'), (3, 'c'), (4, 'd'), (5, 'e');
INSERT INTO t VALUES (2, 'f'), (3, 'g'), (4, 'h');
SELECT approx_count_distinct(a), approx_count_distinct(b) FROM t;
approx_count_distinct | approx_count_distinct
-----------------------+-----------------------
5 | 9
(1 row)

SELECT a, approx_count_distinct(b) FROM t GROUP BY a ORDER BY a;
a | approx_count_distinct
---+-----------------------
1 | 1
2 | 2
3 | 2
4 | 2
5 | 1
(5 rows)

SELECT a, approx_count_distinct(b) OVER (PARTITION BY a) FROM t ORDER BY a;
a | approx_count_distinct
---+-----------------------
1 | 1
2 | 2
2 | 2
3 | 2
3 | 2
4 | 2
4 | 2
5 | 1
(8 rows)

DROP TABLE t;
1 change: 1 addition & 0 deletions test/regression/expected/transactions.out
Original file line number Diff line number Diff line change
Expand Up @@ -159,3 +159,4 @@ FETCH PRIOR FROM c;

COMMIT;
DROP FUNCTION f, f2;
DROP TABLE t;
1 change: 1 addition & 0 deletions test/regression/schedule
Original file line number Diff line number Diff line change
Expand Up @@ -26,3 +26,4 @@ test: transaction_errors
test: secrets
test: prepare
test: function
test: approx_count_distinct
7 changes: 7 additions & 0 deletions test/regression/sql/approx_count_distinct.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
CREATE TABLE t (a int, b text);
INSERT INTO t VALUES (1, 'a'), (2, 'b'), (3, 'c'), (4, 'd'), (5, 'e');
INSERT INTO t VALUES (2, 'f'), (3, 'g'), (4, 'h');
SELECT approx_count_distinct(a), approx_count_distinct(b) FROM t;
SELECT a, approx_count_distinct(b) FROM t GROUP BY a ORDER BY a;
SELECT a, approx_count_distinct(b) OVER (PARTITION BY a) FROM t ORDER BY a;
DROP TABLE t;
1 change: 1 addition & 0 deletions test/regression/sql/transactions.sql
Original file line number Diff line number Diff line change
Expand Up @@ -115,3 +115,4 @@ FETCH PRIOR FROM c;
COMMIT;

DROP FUNCTION f, f2;
DROP TABLE t;

0 comments on commit 6941855

Please sign in to comment.