From ca8bbb78240620fefa7d78cddf77f99cc9967685 Mon Sep 17 00:00:00 2001 From: jyong <718720800@qq.com> Date: Wed, 16 Oct 2024 11:31:50 +0800 Subject: [PATCH 1/3] add the default score value for economical knowledge retrieval --- api/core/rag/retrieval/dataset_retrieval.py | 2 +- .../nodes/knowledge_retrieval/knowledge_retrieval_node.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/api/core/rag/retrieval/dataset_retrieval.py b/api/core/rag/retrieval/dataset_retrieval.py index 633e41d5cf1ed6..e089bdc85fd0e8 100644 --- a/api/core/rag/retrieval/dataset_retrieval.py +++ b/api/core/rag/retrieval/dataset_retrieval.py @@ -231,7 +231,7 @@ def retrieve( source["content"] = segment.content retrieval_resource_list.append(source) if hit_callback and retrieval_resource_list: - retrieval_resource_list = sorted(retrieval_resource_list, key=lambda x: x.get("score"), reverse=True) + retrieval_resource_list = sorted(retrieval_resource_list, key=lambda x: x.get("score", 0.0), reverse=True) for position, item in enumerate(retrieval_resource_list, start=1): item["position"] = position hit_callback.return_retriever_resource_info(retrieval_resource_list) diff --git a/api/core/workflow/nodes/knowledge_retrieval/knowledge_retrieval_node.py b/api/core/workflow/nodes/knowledge_retrieval/knowledge_retrieval_node.py index 0b3e9bd6a888ec..87a75d376f25b4 100644 --- a/api/core/workflow/nodes/knowledge_retrieval/knowledge_retrieval_node.py +++ b/api/core/workflow/nodes/knowledge_retrieval/knowledge_retrieval_node.py @@ -234,7 +234,7 @@ def _fetch_dataset_retriever(self, node_data: KnowledgeRetrievalNodeData, query: retrieval_resource_list.append(source) if retrieval_resource_list: retrieval_resource_list = sorted( - retrieval_resource_list, key=lambda x: x.get("metadata").get("score"), reverse=True + retrieval_resource_list, key=lambda x: x.get("metadata").get("score", 0.0), reverse=True ) position = 1 for item in retrieval_resource_list: From 29e6efdc00b208e7de86c0dc11c3b2997c9b5f68 Mon Sep 17 00:00:00 2001 From: jyong <718720800@qq.com> Date: Wed, 16 Oct 2024 11:52:27 +0800 Subject: [PATCH 2/3] add the default score value for economical knowledge retrieval --- api/core/rag/retrieval/dataset_retrieval.py | 6 +++--- .../nodes/knowledge_retrieval/knowledge_retrieval_node.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/api/core/rag/retrieval/dataset_retrieval.py b/api/core/rag/retrieval/dataset_retrieval.py index e089bdc85fd0e8..816bf13f719398 100644 --- a/api/core/rag/retrieval/dataset_retrieval.py +++ b/api/core/rag/retrieval/dataset_retrieval.py @@ -217,7 +217,7 @@ def retrieve( "data_source_type": document.data_source_type, "segment_id": segment.id, "retriever_from": invoke_from.to_source(), - "score": document_score_list.get(segment.index_node_id, None), + "score": document_score_list.get(segment.index_node_id, .0), } if invoke_from.to_source() == "dev": @@ -231,12 +231,12 @@ def retrieve( source["content"] = segment.content retrieval_resource_list.append(source) if hit_callback and retrieval_resource_list: - retrieval_resource_list = sorted(retrieval_resource_list, key=lambda x: x.get("score", 0.0), reverse=True) + retrieval_resource_list = sorted(retrieval_resource_list, key=lambda x: x.get("score") or 0.0, reverse=True) for position, item in enumerate(retrieval_resource_list, start=1): item["position"] = position hit_callback.return_retriever_resource_info(retrieval_resource_list) if document_context_list: - document_context_list = sorted(document_context_list, key=lambda x: x.score, reverse=True) + document_context_list = sorted(document_context_list, key=lambda x: x.score or 0.0, reverse=True) return str("\n".join([document_context.content for document_context in document_context_list])) return "" diff --git a/api/core/workflow/nodes/knowledge_retrieval/knowledge_retrieval_node.py b/api/core/workflow/nodes/knowledge_retrieval/knowledge_retrieval_node.py index 87a75d376f25b4..8cd208d7fc8d10 100644 --- a/api/core/workflow/nodes/knowledge_retrieval/knowledge_retrieval_node.py +++ b/api/core/workflow/nodes/knowledge_retrieval/knowledge_retrieval_node.py @@ -234,7 +234,7 @@ def _fetch_dataset_retriever(self, node_data: KnowledgeRetrievalNodeData, query: retrieval_resource_list.append(source) if retrieval_resource_list: retrieval_resource_list = sorted( - retrieval_resource_list, key=lambda x: x.get("metadata").get("score", 0.0), reverse=True + retrieval_resource_list, key=lambda x: x.get("metadata").get("score") or 0.0, reverse=True ) position = 1 for item in retrieval_resource_list: From c54b37f685a279a21d07ea1a8319ef403ab12a46 Mon Sep 17 00:00:00 2001 From: jyong <718720800@qq.com> Date: Wed, 16 Oct 2024 13:52:03 +0800 Subject: [PATCH 3/3] add the default score value for economical knowledge retrieval --- api/core/rag/retrieval/dataset_retrieval.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/core/rag/retrieval/dataset_retrieval.py b/api/core/rag/retrieval/dataset_retrieval.py index 816bf13f719398..3455cdc3c40905 100644 --- a/api/core/rag/retrieval/dataset_retrieval.py +++ b/api/core/rag/retrieval/dataset_retrieval.py @@ -217,7 +217,7 @@ def retrieve( "data_source_type": document.data_source_type, "segment_id": segment.id, "retriever_from": invoke_from.to_source(), - "score": document_score_list.get(segment.index_node_id, .0), + "score": document_score_list.get(segment.index_node_id, 0.0), } if invoke_from.to_source() == "dev":