From f3a92f55d465de1947cb2f004b53fcee6f3e18fc Mon Sep 17 00:00:00 2001 From: Brett England Date: Thu, 14 Mar 2024 20:14:17 -0400 Subject: [PATCH 1/3] Unify pgvector and postgres connection settings --- .gitignore | 2 ++ fern/docs/pages/manual/vectordb.mdx | 34 ++++++++++++++----- .../vector_store/vector_store_component.py | 10 +++--- private_gpt/settings/settings.py | 18 +++------- settings-ollama-pg.yaml | 11 +----- settings.yaml | 11 +----- 6 files changed, 41 insertions(+), 45 deletions(-) diff --git a/.gitignore b/.gitignore index 847a30db3..3973991b9 100644 --- a/.gitignore +++ b/.gitignore @@ -26,6 +26,8 @@ __pycache__/ .vscode/ /.run/ .fleet/ +*~ +*brett* # macOS .DS_Store diff --git a/fern/docs/pages/manual/vectordb.mdx b/fern/docs/pages/manual/vectordb.mdx index db28c1fd9..dc579f684 100644 --- a/fern/docs/pages/manual/vectordb.mdx +++ b/fern/docs/pages/manual/vectordb.mdx @@ -1,7 +1,7 @@ ## Vectorstores PrivateGPT supports [Qdrant](https://qdrant.tech/), [Chroma](https://www.trychroma.com/) and [PGVector](https://github.com/pgvector/pgvector) as vectorstore providers. Qdrant being the default. -In order to select one or the other, set the `vectorstore.database` property in the `settings.yaml` file to `qdrant`, `chroma` or `pgvector`. +In order to select one or the other, set the `vectorstore.database` property in the `settings.yaml` file to `qdrant`, `chroma` or `postgres`. ```yaml vectorstore: @@ -50,14 +50,15 @@ poetry install --extras chroma By default `chroma` will use a disk-based database stored in local_data_path / "chroma_db" (being local_data_path defined in settings.yaml) ### PGVector +To use the PGVector store a [postgreSQL](https://www.postgresql.org/) database with the PGVector extension must be used. -To enable PGVector, set the `vectorstore.database` property in the `settings.yaml` file to `pgvector` and install the `vector-stores-postgres` extra. +To enable PGVector, set the `vectorstore.database` property in the `settings.yaml` file to `postgres` and install the `vector-stores-postgres` extra. ```bash poetry install --extras vector-stores-postgres ``` -PGVector settings can be configured by setting values to the `pgvector` property in the `settings.yaml` file. +PGVector settings can be configured by setting values to the `postgres` property in the `settings.yaml` file. The available configuration options are: | Field | Description | @@ -67,19 +68,36 @@ The available configuration options are: | **database** | The specific database to connect to. Default is `postgres` | | **user** | The username for database access. Default is `postgres` | | **password** | The password for database access. (Required) | -| **embed_dim** | The dimensionality of the embedding model (Required) | | **schema_name** | The database schema to use. Default is `private_gpt` | -| **table_name** | The database table to use. Default is `embeddings` | For example: ```yaml -pgvector: +vectorstore: + database: postgresql + +postgres: host: localhost port: 5432 database: postgres user: postgres password: - embed_dim: 384 # 384 is for BAAI/bge-small-en-v1.5 schema_name: private_gpt - table_name: embeddings ``` + +The following table will be created in the database +``` +postgres=# \d private_gpt.data_embeddings + Table "private_gpt.data_embeddings" + Column | Type | Collation | Nullable | Default +-----------+-------------------+-----------+----------+--------------------------------------------------------- + id | bigint | | not null | nextval('private_gpt.data_embeddings_id_seq'::regclass) + text | character varying | | not null | + metadata_ | json | | | + node_id | character varying | | | + embedding | vector(768) | | | +Indexes: + "data_embeddings_pkey" PRIMARY KEY, btree (id) + +postgres=# +``` +The dimensions of the embeddings columns will be set based on the `embedding.embed_dim` value. If the embedding model changes this table may need to be dropped and recreated to avoid a dimension mismatch. diff --git a/private_gpt/components/vector_store/vector_store_component.py b/private_gpt/components/vector_store/vector_store_component.py index 5641f25e7..f9932b574 100644 --- a/private_gpt/components/vector_store/vector_store_component.py +++ b/private_gpt/components/vector_store/vector_store_component.py @@ -38,7 +38,7 @@ class VectorStoreComponent: def __init__(self, settings: Settings) -> None: self.settings = settings match settings.vectorstore.database: - case "pgvector": + case "postgres": try: from llama_index.vector_stores.postgres import ( # type: ignore PGVectorStore, @@ -48,15 +48,17 @@ def __init__(self, settings: Settings) -> None: "Postgres dependencies not found, install with `poetry install --extras vector-stores-postgres`" ) from e - if settings.pgvector is None: + if settings.postgres is None: raise ValueError( - "PGVectorStore settings not found. Please provide settings." + "Postgres settings not found. Please provide settings." ) self.vector_store = typing.cast( VectorStore, PGVectorStore.from_params( - **settings.pgvector.model_dump(exclude_none=True) + **settings.postgres.model_dump(exclude_none=True), + table_name="embeddings", + embed_dim=settings.embedding.embed_dim, ), ) diff --git a/private_gpt/settings/settings.py b/private_gpt/settings/settings.py index 3fe675016..868a3cde4 100644 --- a/private_gpt/settings/settings.py +++ b/private_gpt/settings/settings.py @@ -105,7 +105,7 @@ class LLMSettings(BaseModel): class VectorstoreSettings(BaseModel): - database: Literal["chroma", "qdrant", "pgvector"] + database: Literal["chroma", "qdrant", "postgres"] class NodeStoreSettings(BaseModel): @@ -177,6 +177,10 @@ class EmbeddingSettings(BaseModel): "Do not set it higher than your number of threads of your CPU." ), ) + embed_dim: int = Field( + 384, + description="The dimension of the embeddings stored in the Postgres database", + ) class SagemakerSettings(BaseModel): @@ -280,17 +284,6 @@ class PostgresSettings(BaseModel): ) -class PGVectorSettings(PostgresSettings): - embed_dim: int = Field( - 384, - description="The dimension of the embeddings stored in the Postgres database", - ) - table_name: str = Field( - "embeddings", - description="The name of the table in the Postgres database where the embeddings are stored", - ) - - class QdrantSettings(BaseModel): location: str | None = Field( None, @@ -360,7 +353,6 @@ class Settings(BaseModel): nodestore: NodeStoreSettings qdrant: QdrantSettings | None = None postgres: PostgresSettings | None = None - pgvector: PGVectorSettings | None = None """ diff --git a/settings-ollama-pg.yaml b/settings-ollama-pg.yaml index 2bef97d37..09f852dca 100644 --- a/settings-ollama-pg.yaml +++ b/settings-ollama-pg.yaml @@ -11,6 +11,7 @@ llm: embedding: mode: ollama + embed_dim: 768 ollama: llm_model: mistral @@ -23,16 +24,6 @@ nodestore: vectorstore: database: pgvector -pgvector: - host: localhost - port: 5432 - database: postgres - user: postgres - password: admin - embed_dim: 768 - schema_name: private_gpt - table_name: embeddings - postgres: host: localhost port: 5432 diff --git a/settings.yaml b/settings.yaml index ab04843fb..862844a19 100644 --- a/settings.yaml +++ b/settings.yaml @@ -55,6 +55,7 @@ embedding: # Should be matching the value above in most cases mode: huggingface ingest_mode: simple + embed_dim: 384 # 384 is for BAAI/bge-small-en-v1.5 huggingface: embedding_hf_model_name: BAAI/bge-small-en-v1.5 @@ -68,16 +69,6 @@ nodestore: qdrant: path: local_data/private_gpt/qdrant -pgvector: - host: localhost - port: 5432 - database: postgres - user: postgres - password: postgres - embed_dim: 384 # 384 is for BAAI/bge-small-en-v1.5 - schema_name: private_gpt - table_name: embeddings - postgres: host: localhost port: 5432 From 2e5fcba0a25ef045728c60f207e1a4cf09da1e33 Mon Sep 17 00:00:00 2001 From: Brett England Date: Thu, 14 Mar 2024 20:22:25 -0400 Subject: [PATCH 2/3] Remove local changes --- .gitignore | 2 -- 1 file changed, 2 deletions(-) diff --git a/.gitignore b/.gitignore index 3973991b9..847a30db3 100644 --- a/.gitignore +++ b/.gitignore @@ -26,8 +26,6 @@ __pycache__/ .vscode/ /.run/ .fleet/ -*~ -*brett* # macOS .DS_Store From ae1fed5c39138f65d60724a4826340e5053374b3 Mon Sep 17 00:00:00 2001 From: Brett England Date: Thu, 14 Mar 2024 20:25:30 -0400 Subject: [PATCH 3/3] Update file pgvector->postgres --- settings-ollama-pg.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/settings-ollama-pg.yaml b/settings-ollama-pg.yaml index 09f852dca..b97982459 100644 --- a/settings-ollama-pg.yaml +++ b/settings-ollama-pg.yaml @@ -22,7 +22,7 @@ nodestore: database: postgres vectorstore: - database: pgvector + database: postgres postgres: host: localhost