Skip to content

Commit

Permalink
feat: allow superusers to create vectorizers on any table
Browse files Browse the repository at this point in the history
  • Loading branch information
jgpruitt committed Dec 10, 2024
1 parent 35e2fc8 commit 027b3f4
Show file tree
Hide file tree
Showing 2 changed files with 114 additions and 3 deletions.
16 changes: 13 additions & 3 deletions projects/extension/sql/idempotent/013-vectorizer-api.sql
Original file line number Diff line number Diff line change
Expand Up @@ -69,20 +69,30 @@ begin
end if;

-- get source table name and schema name
select k.relname, n.nspname, pg_catalog.pg_has_role(pg_catalog.current_user(), k.relowner, 'MEMBER')
select
k.relname
, n.nspname
, pg_catalog.pg_has_role(pg_catalog.current_user(), k.relowner, 'MEMBER')
into strict _source_table, _source_schema, _is_owner
from pg_catalog.pg_class k
inner join pg_catalog.pg_namespace n on (k.relnamespace operator(pg_catalog.=) n.oid)
where k.oid operator(pg_catalog.=) source
;
-- not an owner of the table, but superuser?
if not _is_owner then
select r.rolsuper into strict _is_owner
from pg_catalog.pg_roles r
where r.rolname operator(pg_catalog.=) pg_catalog.current_user()
;
end if;

if not _is_owner then
raise exception 'only the owner of the source table may create a vectorizer on it';
raise exception 'only a superuser or the owner of the source table may create a vectorizer on it';
end if;

select (embedding operator(pg_catalog.->) 'dimensions')::int into _dimensions;
if _dimensions is null then
raise exception '_dimensions argument is required';
raise exception 'dimensions argument is required';
end if;

-- get the source table's primary key definition
Expand Down
101 changes: 101 additions & 0 deletions projects/extension/tests/vectorizer/test_vectorizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -1662,3 +1662,104 @@ def test_queue_pending():
# a non-exact count should yield 9223372036854775807
cur.execute("select ai.vectorizer_queue_pending(%s)", (vectorizer_id,))
assert cur.fetchone()[0] == 9223372036854775807


def create_user(cur: psycopg.Cursor, user: str) -> None:
cur.execute(
"""
select count(*) > 0
from pg_catalog.pg_roles
where rolname = %s
""",
(user,),
)
if not cur.fetchone()[0]:
cur.execute(f"create user {user}")


def test_create_vectorizer_privs():
with psycopg.connect(db_url("postgres")) as con:
with con.cursor() as cur:
create_user(cur, "jimmy")
cur.execute("grant create on schema public to jimmy")
cur.execute("select ai.grant_ai_usage('jimmy', admin=>false)")
create_user(cur, "greg")
cur.execute("select ai.grant_ai_usage('greg', admin=>false)")

# jimmy owns the source table
with psycopg.connect(db_url("jimmy")) as con:
with con.cursor() as cur:
cur.execute("""
create table priv_test
( id int not null primary key generated always as identity
, foo text
, bar text
)
""")

# greg does not own the table, does not own the database, and is not superuser
# this should fail
with psycopg.connect(db_url("greg")) as con:
with con.cursor() as cur:
with pytest.raises(
psycopg.errors.RaiseException,
match=".*only a superuser or the owner of the source table may create a vectorizer on it",
):
cur.execute("""
select ai.create_vectorizer
( 'priv_test'::regclass
, embedding=>ai.embedding_openai('text-embedding-3-small', 3)
, chunking=>ai.chunking_character_text_splitter('foo')
, scheduling=>ai.scheduling_none()
, indexing=>ai.indexing_none()
, grant_to=>null
);
""")

# test owns the database, but not the table, and is not superuser
# this should not work
with psycopg.connect(db_url("test")) as con:
with con.cursor() as cur:
with pytest.raises(
psycopg.errors.RaiseException,
match=".*only a superuser or the owner of the source table may create a vectorizer on it",
):
cur.execute("""
select ai.create_vectorizer
( 'priv_test'::regclass
, embedding=>ai.embedding_openai('text-embedding-3-small', 3)
, chunking=>ai.chunking_character_text_splitter('foo')
, scheduling=>ai.scheduling_none()
, indexing=>ai.indexing_none()
, grant_to=>null
);
""")

# jimmy owns the table. this should work
with psycopg.connect(db_url("jimmy")) as con:
with con.cursor() as cur:
cur.execute("""
select ai.create_vectorizer
( 'priv_test'::regclass
, embedding=>ai.embedding_openai('text-embedding-3-small', 3)
, chunking=>ai.chunking_character_text_splitter('foo')
, scheduling=>ai.scheduling_none()
, indexing=>ai.indexing_none()
, grant_to=>null
);
""")

# postgres is superuser. this should work
with psycopg.connect(db_url("postgres")) as con:
with con.cursor() as cur:
cur.execute("""
select ai.create_vectorizer
( 'priv_test'::regclass
, destination=>'red_balloon'
, embedding=>ai.embedding_openai('text-embedding-3-small', 3)
, chunking=>ai.chunking_character_text_splitter('foo')
, scheduling=>ai.scheduling_none()
, indexing=>ai.indexing_none()
, grant_to=>null
);
""")

0 comments on commit 027b3f4

Please sign in to comment.