Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: AnalyticdbVector retrieval scores #8803

Merged
merged 2 commits into from
Sep 27, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 6 additions & 13 deletions api/core/rag/datasource/vdb/analyticdb/analyticdb_vector.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,19 +40,8 @@ def to_analyticdb_client_params(self):


class AnalyticdbVector(BaseVector):
_instance = None
_init = False

def __new__(cls, *args, **kwargs):
if cls._instance is None:
cls._instance = super().__new__(cls)
return cls._instance

def __init__(self, collection_name: str, config: AnalyticdbConfig):
# collection_name must be updated every time
self._collection_name = collection_name.lower()
if AnalyticdbVector._init:
return
try:
from alibabacloud_gpdb20160503.client import Client
from alibabacloud_tea_openapi import models as open_api_models
Expand All @@ -62,7 +51,6 @@ def __init__(self, collection_name: str, config: AnalyticdbConfig):
self._client_config = open_api_models.Config(user_agent="dify", **config.to_analyticdb_client_params())
self._client = Client(self._client_config)
self._initialize()
AnalyticdbVector._init = True

def _initialize(self) -> None:
cache_key = f"vector_indexing_{self.config.instance_id}"
Expand Down Expand Up @@ -257,11 +245,14 @@ def search_by_vector(self, query_vector: list[float], **kwargs: Any) -> list[Doc
documents = []
for match in response.body.matches.match:
if match.score > score_threshold:
metadata = json.loads(match.metadata.get("metadata_"))
metadata["score"] = match.score
doc = Document(
page_content=match.metadata.get("page_content"),
metadata=json.loads(match.metadata.get("metadata_")),
metadata=metadata,
)
documents.append(doc)
documents = sorted(documents, key=lambda x: x.metadata["score"], reverse=True)
return documents

def search_by_full_text(self, query: str, **kwargs: Any) -> list[Document]:
Expand All @@ -286,12 +277,14 @@ def search_by_full_text(self, query: str, **kwargs: Any) -> list[Document]:
for match in response.body.matches.match:
if match.score > score_threshold:
metadata = json.loads(match.metadata.get("metadata_"))
metadata["score"] = match.score
doc = Document(
page_content=match.metadata.get("page_content"),
vector=match.metadata.get("vector"),
metadata=metadata,
)
documents.append(doc)
documents = sorted(documents, key=lambda x: x.metadata["score"], reverse=True)
return documents

def delete(self) -> None:
Expand Down