From f78b953f561b8697d07a530e89c7e727db1161ed Mon Sep 17 00:00:00 2001 From: Ayush Agrawal Date: Mon, 16 Sep 2024 16:13:41 -0700 Subject: [PATCH] feat: Adding Pinecone Vector DB option for RAG corpuses to SDK PiperOrigin-RevId: 675328511 --- tests/unit/vertex_rag/test_rag_constants.py | 28 +++++++++++++++++++ tests/unit/vertex_rag/test_rag_data.py | 24 +++++++++++++++++ vertexai/preview/rag/__init__.py | 2 ++ vertexai/preview/rag/rag_data.py | 3 ++- vertexai/preview/rag/utils/_gapic_utils.py | 30 ++++++++++++++++++--- vertexai/preview/rag/utils/resources.py | 16 ++++++++++- 6 files changed, 97 insertions(+), 6 deletions(-) diff --git a/tests/unit/vertex_rag/test_rag_constants.py b/tests/unit/vertex_rag/test_rag_constants.py index 356dbf4ba7..79a7dc5e5f 100644 --- a/tests/unit/vertex_rag/test_rag_constants.py +++ b/tests/unit/vertex_rag/test_rag_constants.py @@ -20,6 +20,7 @@ from vertexai.preview.rag import ( EmbeddingModelConfig, + Pinecone, RagCorpus, RagFile, RagResource, @@ -69,6 +70,14 @@ collection_name=TEST_WEAVIATE_COLLECTION_NAME, api_key=TEST_WEAVIATE_API_KEY_SECRET_VERSION, ) +TEST_PINECONE_INDEX_NAME = "test-pinecone-index" +TEST_PINECONE_API_KEY_SECRET_VERSION = ( + "projects/test-project/secrets/test-secret/versions/1" +) +TEST_PINECONE_CONFIG = Pinecone( + index_name=TEST_PINECONE_INDEX_NAME, + api_key=TEST_PINECONE_API_KEY_SECRET_VERSION, +) TEST_VERTEX_FEATURE_STORE_RESOURCE_NAME = "test-feature-view-resource-name" TEST_GAPIC_RAG_CORPUS = GapicRagCorpus( name=TEST_RAG_CORPUS_RESOURCE_NAME, @@ -106,6 +115,19 @@ ), ), ) +TEST_GAPIC_RAG_CORPUS_PINECONE = GapicRagCorpus( + name=TEST_RAG_CORPUS_RESOURCE_NAME, + display_name=TEST_CORPUS_DISPLAY_NAME, + description=TEST_CORPUS_DISCRIPTION, + rag_vector_db_config=RagVectorDbConfig( + pinecone=RagVectorDbConfig.Pinecone(index_name=TEST_PINECONE_INDEX_NAME), + api_auth=api_auth.ApiAuth( + api_key_config=api_auth.ApiAuth.ApiKeyConfig( + api_key_secret_version=TEST_PINECONE_API_KEY_SECRET_VERSION + ), + ), + ), +) TEST_EMBEDDING_MODEL_CONFIG = EmbeddingModelConfig( publisher_model="publishers/google/models/textembedding-gecko", ) @@ -130,6 +152,12 @@ description=TEST_CORPUS_DISCRIPTION, vector_db=TEST_VERTEX_FEATURE_STORE_CONFIG, ) +TEST_RAG_CORPUS_PINECONE = RagCorpus( + name=TEST_RAG_CORPUS_RESOURCE_NAME, + display_name=TEST_CORPUS_DISPLAY_NAME, + description=TEST_CORPUS_DISCRIPTION, + vector_db=TEST_PINECONE_CONFIG, +) TEST_PAGE_TOKEN = "test-page-token" # RagFiles diff --git a/tests/unit/vertex_rag/test_rag_data.py b/tests/unit/vertex_rag/test_rag_data.py index 5f7866c389..89c509e249 100644 --- a/tests/unit/vertex_rag/test_rag_data.py +++ b/tests/unit/vertex_rag/test_rag_data.py @@ -79,6 +79,21 @@ def create_rag_corpus_mock_vertex_feature_store(): yield create_rag_corpus_mock_vertex_feature_store +@pytest.fixture +def create_rag_corpus_mock_pinecone(): + with mock.patch.object( + VertexRagDataServiceClient, + "create_rag_corpus", + ) as create_rag_corpus_mock_pinecone: + create_rag_corpus_lro_mock = mock.Mock(ga_operation.Operation) + create_rag_corpus_lro_mock.done.return_value = True + create_rag_corpus_lro_mock.result.return_value = ( + tc.TEST_GAPIC_RAG_CORPUS_PINECONE + ) + create_rag_corpus_mock_pinecone.return_value = create_rag_corpus_lro_mock + yield create_rag_corpus_mock_pinecone + + @pytest.fixture def list_rag_corpora_pager_mock(): with mock.patch.object( @@ -242,6 +257,15 @@ def test_create_corpus_vertex_feature_store_success(self): rag_corpus_eq(rag_corpus, tc.TEST_RAG_CORPUS_VERTEX_FEATURE_STORE) + @pytest.mark.usefixtures("create_rag_corpus_mock_pinecone") + def test_create_corpus_pinecone_success(self): + rag_corpus = rag.create_corpus( + display_name=tc.TEST_CORPUS_DISPLAY_NAME, + vector_db=tc.TEST_PINECONE_CONFIG, + ) + + rag_corpus_eq(rag_corpus, tc.TEST_RAG_CORPUS_PINECONE) + @pytest.mark.usefixtures("rag_data_client_mock_exception") def test_create_corpus_failure(self): with pytest.raises(RuntimeError) as e: diff --git a/vertexai/preview/rag/__init__.py b/vertexai/preview/rag/__init__.py index bbd7cf1f83..3ad4a782e0 100644 --- a/vertexai/preview/rag/__init__.py +++ b/vertexai/preview/rag/__init__.py @@ -40,6 +40,7 @@ EmbeddingModelConfig, JiraQuery, JiraSource, + Pinecone, RagCorpus, RagFile, RagResource, @@ -54,6 +55,7 @@ "EmbeddingModelConfig", "JiraQuery", "JiraSource", + "Pinecone", "RagCorpus", "RagFile", "RagResource", diff --git a/vertexai/preview/rag/rag_data.py b/vertexai/preview/rag/rag_data.py index ea7b730d6a..43edd2ce63 100644 --- a/vertexai/preview/rag/rag_data.py +++ b/vertexai/preview/rag/rag_data.py @@ -45,6 +45,7 @@ from vertexai.preview.rag.utils.resources import ( EmbeddingModelConfig, JiraSource, + Pinecone, RagCorpus, RagFile, SlackChannelsSource, @@ -57,7 +58,7 @@ def create_corpus( display_name: Optional[str] = None, description: Optional[str] = None, embedding_model_config: Optional[EmbeddingModelConfig] = None, - vector_db: Optional[Union[Weaviate, VertexFeatureStore]] = None, + vector_db: Optional[Union[Weaviate, VertexFeatureStore, Pinecone]] = None, ) -> RagCorpus: """Creates a new RagCorpus resource. diff --git a/vertexai/preview/rag/utils/_gapic_utils.py b/vertexai/preview/rag/utils/_gapic_utils.py index 95a4e24612..61a0f68d0b 100644 --- a/vertexai/preview/rag/utils/_gapic_utils.py +++ b/vertexai/preview/rag/utils/_gapic_utils.py @@ -38,6 +38,7 @@ ) from vertexai.preview.rag.utils.resources import ( EmbeddingModelConfig, + Pinecone, RagCorpus, RagFile, SlackChannelsSource, @@ -98,8 +99,8 @@ def convert_gapic_to_embedding_model_config( def convert_gapic_to_vector_db( gapic_vector_db: RagVectorDbConfig, -) -> Union[Weaviate, VertexFeatureStore]: - """Convert Gapic RagVectorDbConfig to Weaviate or VertexFeatureStore.""" +) -> Union[Weaviate, VertexFeatureStore, Pinecone]: + """Convert Gapic RagVectorDbConfig to Weaviate, VertexFeatureStore, or Pinecone.""" if gapic_vector_db.__contains__("weaviate"): return Weaviate( weaviate_http_endpoint=gapic_vector_db.weaviate.http_endpoint, @@ -110,6 +111,11 @@ def convert_gapic_to_vector_db( return VertexFeatureStore( resource_name=gapic_vector_db.vertex_feature_store.feature_view_resource_name, ) + elif gapic_vector_db.__contains__("pinecone"): + return Pinecone( + index_name=gapic_vector_db.pinecone.index_name, + api_key=gapic_vector_db.api_auth.api_key_config.api_key_secret_version, + ) else: return None @@ -395,7 +401,7 @@ def set_embedding_model_config( def set_vector_db( - vector_db: Union[Weaviate, VertexFeatureStore], + vector_db: Union[Weaviate, VertexFeatureStore, Pinecone], rag_corpus: GapicRagCorpus, ) -> None: """Sets the vector db configuration for the rag corpus.""" @@ -423,5 +429,21 @@ def set_vector_db( feature_view_resource_name=resource_name, ), ) + elif isinstance(vector_db, Pinecone): + index_name = vector_db.index_name + api_key = vector_db.api_key + + rag_corpus.rag_vector_db_config = RagVectorDbConfig( + pinecone=RagVectorDbConfig.Pinecone( + index_name=index_name, + ), + api_auth=api_auth.ApiAuth( + api_key_config=api_auth.ApiAuth.ApiKeyConfig( + api_key_secret_version=api_key + ), + ), + ) else: - raise TypeError("vector_db must be a Weaviate or VertexFeatureStore.") + raise TypeError( + "vector_db must be a Weaviate, VertexFeatureStore, or Pinecone." + ) diff --git a/vertexai/preview/rag/utils/resources.py b/vertexai/preview/rag/utils/resources.py index ef46f4e6a2..8ae2a184ab 100644 --- a/vertexai/preview/rag/utils/resources.py +++ b/vertexai/preview/rag/utils/resources.py @@ -98,6 +98,20 @@ class VertexFeatureStore: resource_name: str +@dataclasses.dataclass +class Pinecone: + """Pinecone. + + Attributes: + index_name: The Pinecone index name. + api_key: The SecretManager resource name for the Pinecone DB API token. Format: + ``projects/{project}/secrets/{secret}/versions/{version}`` + """ + + index_name: str + api_key: str + + @dataclasses.dataclass class RagCorpus: """RAG corpus(output only). @@ -115,7 +129,7 @@ class RagCorpus: display_name: Optional[str] = None description: Optional[str] = None embedding_model_config: Optional[EmbeddingModelConfig] = None - vector_db: Optional[Union[Weaviate, VertexFeatureStore]] = None + vector_db: Optional[Union[Weaviate, VertexFeatureStore, Pinecone]] = None @dataclasses.dataclass