From 0f08b0f0535a9bf9c05dfbd031cb7f8a6493ca81 Mon Sep 17 00:00:00 2001 From: Kevin Hu Date: Fri, 29 Nov 2024 16:39:55 +0800 Subject: [PATCH] Weight up title and keywords for chunks in terms of retrieval (#3750) ### What problem does this PR solve? ### Type of change - [x] Performance Improvement --- rag/nlp/search.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rag/nlp/search.py b/rag/nlp/search.py index a24f7a71d87..5abacc5f93b 100644 --- a/rag/nlp/search.py +++ b/rag/nlp/search.py @@ -250,7 +250,7 @@ def rerank(self, sres, query, tkweight=0.3, content_ltks = sres.field[i][cfield].split() title_tks = [t for t in sres.field[i].get("title_tks", "").split() if t] important_kwd = sres.field[i].get("important_kwd", []) - tks = content_ltks + title_tks + important_kwd + tks = content_ltks + title_tks*2 + important_kwd*5 ins_tw.append(tks) sim, tksim, vtsim = self.qryr.hybrid_similarity(sres.query_vector,