Replies: 1 comment 2 replies
-
Hey @Alok1191! I'm here to assist you with any bugs, questions, or contributions. Let's tackle this issue together! To instantiate a retriever for a vector database containing multiple collections in LangChain, you can modify your setup to handle multiple collections. Here is an example of how you can achieve this:
from langchain_community.vectorstores import Chroma
from langchain_community.embeddings import OpenAIEmbeddings
from pathlib import Path
def get_vectorstore(collection_name: str):
"""Get the vectorstore used for this example."""
return Chroma(
collection_name=collection_name,
persist_directory=str(Path(__file__).parent.parent / f"chroma_db_{collection_name}"),
embedding_function=OpenAIEmbeddings(),
)
from langchain.retrievers.multi_vector import MultiVectorRetriever
from langchain_community.storage import RedisStore
def get_multi_vector_retriever(docstore_id_key: str, collection_names: list):
"""Create the composed retriever object."""
vectorstores = [get_vectorstore(name) for name in collection_names]
store = get_docstore()
return MultiVectorRetriever(
vectorstores=vectorstores,
byte_store=store,
id_key=docstore_id_key,
)
def get_docstore():
"""Get the metadata store used for this example."""
return RedisStore(
redis_url=format_redis_conn_from_env(),
namespace="multi_vector_retriever_metadata"
)
def format_redis_conn_from_env() -> str:
redis_url = os.getenv("REDIS_URL", None)
if redis_url:
return redis_url
else:
using_ssl = get_boolean_env_var("REDIS_SSL", False)
start = "rediss://" if using_ssl else "redis://"
# if using RBAC
password = os.getenv("REDIS_PASSWORD", None)
username = os.getenv("REDIS_USERNAME", "default")
if password is not None:
start += f"{username}:{password}@"
host = os.getenv("REDIS_HOST", "localhost")
port = int(os.getenv("REDIS_PORT", 6379))
return start + f"{host}:{port}"
def get_boolean_env_var(var_name, default_value=False):
"""Retrieve the boolean value of an environment variable."""
true_values = {"true", "1", "t", "y", "yes"}
false_values = {"false", "0", "f", "n", "no"}
value = os.getenv(var_name, "").lower()
if value in true_values:
return True
elif value in false_values:
return False
else:
return default_value
collection_names = ["collection1", "collection2", "collection3"] # Add your collection names here
retriever = get_multi_vector_retriever(docstore_id_key="doc_id", collection_names=collection_names) This setup allows you to create a retriever that can handle multiple collections within the vector database by looping through all the collections and retrieving the relevant text [1][2][3]. |
Beta Was this translation helpful? Give feedback.
2 replies
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
-
Checked other resources
Commit to Help
Example Code
Description
I am trying to create an application in which I have a vector database with multiple containers and I want the langchain retriever to be get the relevant text after looping through all the containers
As of now the vectorstore variable is associated with a single container
System Info
Beta Was this translation helpful? Give feedback.
All reactions