Skip to content

Commit

Permalink
feat: Adding Pinecone Vector DB option for RAG corpuses to SDK
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 675328511
  • Loading branch information
speedstorm1 authored and copybara-github committed Sep 16, 2024
1 parent a6cbb74 commit f78b953
Show file tree
Hide file tree
Showing 6 changed files with 97 additions and 6 deletions.
28 changes: 28 additions & 0 deletions tests/unit/vertex_rag/test_rag_constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

from vertexai.preview.rag import (
EmbeddingModelConfig,
Pinecone,
RagCorpus,
RagFile,
RagResource,
Expand Down Expand Up @@ -69,6 +70,14 @@
collection_name=TEST_WEAVIATE_COLLECTION_NAME,
api_key=TEST_WEAVIATE_API_KEY_SECRET_VERSION,
)
TEST_PINECONE_INDEX_NAME = "test-pinecone-index"
TEST_PINECONE_API_KEY_SECRET_VERSION = (
"projects/test-project/secrets/test-secret/versions/1"
)
TEST_PINECONE_CONFIG = Pinecone(
index_name=TEST_PINECONE_INDEX_NAME,
api_key=TEST_PINECONE_API_KEY_SECRET_VERSION,
)
TEST_VERTEX_FEATURE_STORE_RESOURCE_NAME = "test-feature-view-resource-name"
TEST_GAPIC_RAG_CORPUS = GapicRagCorpus(
name=TEST_RAG_CORPUS_RESOURCE_NAME,
Expand Down Expand Up @@ -106,6 +115,19 @@
),
),
)
TEST_GAPIC_RAG_CORPUS_PINECONE = GapicRagCorpus(
name=TEST_RAG_CORPUS_RESOURCE_NAME,
display_name=TEST_CORPUS_DISPLAY_NAME,
description=TEST_CORPUS_DISCRIPTION,
rag_vector_db_config=RagVectorDbConfig(
pinecone=RagVectorDbConfig.Pinecone(index_name=TEST_PINECONE_INDEX_NAME),
api_auth=api_auth.ApiAuth(
api_key_config=api_auth.ApiAuth.ApiKeyConfig(
api_key_secret_version=TEST_PINECONE_API_KEY_SECRET_VERSION
),
),
),
)
TEST_EMBEDDING_MODEL_CONFIG = EmbeddingModelConfig(
publisher_model="publishers/google/models/textembedding-gecko",
)
Expand All @@ -130,6 +152,12 @@
description=TEST_CORPUS_DISCRIPTION,
vector_db=TEST_VERTEX_FEATURE_STORE_CONFIG,
)
TEST_RAG_CORPUS_PINECONE = RagCorpus(
name=TEST_RAG_CORPUS_RESOURCE_NAME,
display_name=TEST_CORPUS_DISPLAY_NAME,
description=TEST_CORPUS_DISCRIPTION,
vector_db=TEST_PINECONE_CONFIG,
)
TEST_PAGE_TOKEN = "test-page-token"

# RagFiles
Expand Down
24 changes: 24 additions & 0 deletions tests/unit/vertex_rag/test_rag_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,21 @@ def create_rag_corpus_mock_vertex_feature_store():
yield create_rag_corpus_mock_vertex_feature_store


@pytest.fixture
def create_rag_corpus_mock_pinecone():
with mock.patch.object(
VertexRagDataServiceClient,
"create_rag_corpus",
) as create_rag_corpus_mock_pinecone:
create_rag_corpus_lro_mock = mock.Mock(ga_operation.Operation)
create_rag_corpus_lro_mock.done.return_value = True
create_rag_corpus_lro_mock.result.return_value = (
tc.TEST_GAPIC_RAG_CORPUS_PINECONE
)
create_rag_corpus_mock_pinecone.return_value = create_rag_corpus_lro_mock
yield create_rag_corpus_mock_pinecone


@pytest.fixture
def list_rag_corpora_pager_mock():
with mock.patch.object(
Expand Down Expand Up @@ -242,6 +257,15 @@ def test_create_corpus_vertex_feature_store_success(self):

rag_corpus_eq(rag_corpus, tc.TEST_RAG_CORPUS_VERTEX_FEATURE_STORE)

@pytest.mark.usefixtures("create_rag_corpus_mock_pinecone")
def test_create_corpus_pinecone_success(self):
rag_corpus = rag.create_corpus(
display_name=tc.TEST_CORPUS_DISPLAY_NAME,
vector_db=tc.TEST_PINECONE_CONFIG,
)

rag_corpus_eq(rag_corpus, tc.TEST_RAG_CORPUS_PINECONE)

@pytest.mark.usefixtures("rag_data_client_mock_exception")
def test_create_corpus_failure(self):
with pytest.raises(RuntimeError) as e:
Expand Down
2 changes: 2 additions & 0 deletions vertexai/preview/rag/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
EmbeddingModelConfig,
JiraQuery,
JiraSource,
Pinecone,
RagCorpus,
RagFile,
RagResource,
Expand All @@ -54,6 +55,7 @@
"EmbeddingModelConfig",
"JiraQuery",
"JiraSource",
"Pinecone",
"RagCorpus",
"RagFile",
"RagResource",
Expand Down
3 changes: 2 additions & 1 deletion vertexai/preview/rag/rag_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
from vertexai.preview.rag.utils.resources import (
EmbeddingModelConfig,
JiraSource,
Pinecone,
RagCorpus,
RagFile,
SlackChannelsSource,
Expand All @@ -57,7 +58,7 @@ def create_corpus(
display_name: Optional[str] = None,
description: Optional[str] = None,
embedding_model_config: Optional[EmbeddingModelConfig] = None,
vector_db: Optional[Union[Weaviate, VertexFeatureStore]] = None,
vector_db: Optional[Union[Weaviate, VertexFeatureStore, Pinecone]] = None,
) -> RagCorpus:
"""Creates a new RagCorpus resource.
Expand Down
30 changes: 26 additions & 4 deletions vertexai/preview/rag/utils/_gapic_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
)
from vertexai.preview.rag.utils.resources import (
EmbeddingModelConfig,
Pinecone,
RagCorpus,
RagFile,
SlackChannelsSource,
Expand Down Expand Up @@ -98,8 +99,8 @@ def convert_gapic_to_embedding_model_config(

def convert_gapic_to_vector_db(
gapic_vector_db: RagVectorDbConfig,
) -> Union[Weaviate, VertexFeatureStore]:
"""Convert Gapic RagVectorDbConfig to Weaviate or VertexFeatureStore."""
) -> Union[Weaviate, VertexFeatureStore, Pinecone]:
"""Convert Gapic RagVectorDbConfig to Weaviate, VertexFeatureStore, or Pinecone."""
if gapic_vector_db.__contains__("weaviate"):
return Weaviate(
weaviate_http_endpoint=gapic_vector_db.weaviate.http_endpoint,
Expand All @@ -110,6 +111,11 @@ def convert_gapic_to_vector_db(
return VertexFeatureStore(
resource_name=gapic_vector_db.vertex_feature_store.feature_view_resource_name,
)
elif gapic_vector_db.__contains__("pinecone"):
return Pinecone(
index_name=gapic_vector_db.pinecone.index_name,
api_key=gapic_vector_db.api_auth.api_key_config.api_key_secret_version,
)
else:
return None

Expand Down Expand Up @@ -395,7 +401,7 @@ def set_embedding_model_config(


def set_vector_db(
vector_db: Union[Weaviate, VertexFeatureStore],
vector_db: Union[Weaviate, VertexFeatureStore, Pinecone],
rag_corpus: GapicRagCorpus,
) -> None:
"""Sets the vector db configuration for the rag corpus."""
Expand Down Expand Up @@ -423,5 +429,21 @@ def set_vector_db(
feature_view_resource_name=resource_name,
),
)
elif isinstance(vector_db, Pinecone):
index_name = vector_db.index_name
api_key = vector_db.api_key

rag_corpus.rag_vector_db_config = RagVectorDbConfig(
pinecone=RagVectorDbConfig.Pinecone(
index_name=index_name,
),
api_auth=api_auth.ApiAuth(
api_key_config=api_auth.ApiAuth.ApiKeyConfig(
api_key_secret_version=api_key
),
),
)
else:
raise TypeError("vector_db must be a Weaviate or VertexFeatureStore.")
raise TypeError(
"vector_db must be a Weaviate, VertexFeatureStore, or Pinecone."
)
16 changes: 15 additions & 1 deletion vertexai/preview/rag/utils/resources.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,20 @@ class VertexFeatureStore:
resource_name: str


@dataclasses.dataclass
class Pinecone:
"""Pinecone.
Attributes:
index_name: The Pinecone index name.
api_key: The SecretManager resource name for the Pinecone DB API token. Format:
``projects/{project}/secrets/{secret}/versions/{version}``
"""

index_name: str
api_key: str


@dataclasses.dataclass
class RagCorpus:
"""RAG corpus(output only).
Expand All @@ -115,7 +129,7 @@ class RagCorpus:
display_name: Optional[str] = None
description: Optional[str] = None
embedding_model_config: Optional[EmbeddingModelConfig] = None
vector_db: Optional[Union[Weaviate, VertexFeatureStore]] = None
vector_db: Optional[Union[Weaviate, VertexFeatureStore, Pinecone]] = None


@dataclasses.dataclass
Expand Down

0 comments on commit f78b953

Please sign in to comment.