Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 15 additions & 3 deletions libs/elasticsearch/langchain_elasticsearch/_async/vectorstores.py
Original file line number Diff line number Diff line change
Expand Up @@ -389,6 +389,7 @@ async def asimilarity_search(
k: int = 4,
fetch_k: int = 50,
filter: Optional[List[dict]] = None,
fields: Optional[List[str]] = None,
*,
custom_query: Optional[
Callable[[Dict[str, Any], Optional[str]], Dict[str, Any]]
Expand All @@ -413,11 +414,13 @@ async def asimilarity_search(
k=k,
num_candidates=fetch_k,
filter=filter,
fields=fields,
custom_query=custom_query,
)
docs = _hits_to_docs_scores(
hits=hits,
content_field=self.query_field,
fields=fields,
doc_builder=doc_builder,
)
return [doc for doc, _score in docs]
Expand Down Expand Up @@ -504,7 +507,8 @@ async def asimilarity_search_with_score(
query: str,
k: int = 4,
filter: Optional[List[dict]] = None,
*,
fields: Optional[List[str]] = None,
*,

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There's still a random indent here.

custom_query: Optional[
Callable[[Dict[str, Any], Optional[str]], Dict[str, Any]]
] = None,
Expand All @@ -528,11 +532,16 @@ async def asimilarity_search_with_score(
raise ValueError("scores are currently not supported in hybrid mode")

hits = await self._store.search(
query=query, k=k, filter=filter, custom_query=custom_query
query=query,
k=k,
filter=filter,
fields=fields,
custom_query=custom_query
)
return _hits_to_docs_scores(
hits=hits,
content_field=self.query_field,
fields=fields,
doc_builder=doc_builder,
)

Expand All @@ -541,7 +550,8 @@ async def asimilarity_search_by_vector_with_relevance_scores(
embedding: List[float],
k: int = 4,
filter: Optional[List[Dict]] = None,
*,
fields: Optional[List[str]] = None,
*,
custom_query: Optional[
Callable[[Dict[str, Any], Optional[str]], Dict[str, Any]]
] = None,
Expand Down Expand Up @@ -569,11 +579,13 @@ async def asimilarity_search_by_vector_with_relevance_scores(
query_vector=embedding,
k=k,
filter=filter,
fields=fields,
custom_query=custom_query,
)
return _hits_to_docs_scores(
hits=hits,
content_field=self.query_field,
fields=fields,
doc_builder=doc_builder,
)

Expand Down
14 changes: 13 additions & 1 deletion libs/elasticsearch/langchain_elasticsearch/_sync/vectorstores.py
Original file line number Diff line number Diff line change
Expand Up @@ -389,6 +389,7 @@ def similarity_search(
k: int = 4,
fetch_k: int = 50,
filter: Optional[List[dict]] = None,
fields: Optional[List[str]] = None,
*,
custom_query: Optional[
Callable[[Dict[str, Any], Optional[str]], Dict[str, Any]]
Expand All @@ -413,11 +414,13 @@ def similarity_search(
k=k,
num_candidates=fetch_k,
filter=filter,
fields=fields,
custom_query=custom_query,
)
docs = _hits_to_docs_scores(
hits=hits,
content_field=self.query_field,
fields=fields,
doc_builder=doc_builder,
)
return [doc for doc, _score in docs]
Expand Down Expand Up @@ -504,6 +507,7 @@ def similarity_search_with_score(
query: str,
k: int = 4,
filter: Optional[List[dict]] = None,
fields: Optional[List[str]] = None,
*,
custom_query: Optional[
Callable[[Dict[str, Any], Optional[str]], Dict[str, Any]]
Expand All @@ -528,11 +532,16 @@ def similarity_search_with_score(
raise ValueError("scores are currently not supported in hybrid mode")

hits = self._store.search(
query=query, k=k, filter=filter, custom_query=custom_query
query=query,
k=k,
filter=filter,
fields=fields,
custom_query=custom_query
)
return _hits_to_docs_scores(
hits=hits,
content_field=self.query_field,
fields=fields,
doc_builder=doc_builder,
)

Expand All @@ -541,6 +550,7 @@ def similarity_search_by_vector_with_relevance_scores(
embedding: List[float],
k: int = 4,
filter: Optional[List[Dict]] = None,
fields: Optional[List[str]] = None,
*,
custom_query: Optional[
Callable[[Dict[str, Any], Optional[str]], Dict[str, Any]]
Expand Down Expand Up @@ -569,11 +579,13 @@ def similarity_search_by_vector_with_relevance_scores(
query_vector=embedding,
k=k,
filter=filter,
fields=fields,
custom_query=custom_query,
)
return _hits_to_docs_scores(
hits=hits,
content_field=self.query_field,
fields=fields,
doc_builder=doc_builder,
)

Expand Down
9 changes: 6 additions & 3 deletions libs/elasticsearch/langchain_elasticsearch/_utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,11 +69,14 @@ def _hits_to_docs_scores(

documents = []

def default_doc_builder(hit: Dict) -> Document:
return Document(
def default_doc_builder(hit: Dict, fields: List[str]) -> Document:
doc = Document(
page_content=hit["_source"].get(content_field, ""),
metadata=hit["_source"].get("metadata", {}),
)
for field_key in fields:

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Lines (84-88) (now 87-91) do exactly that, wouldn't those lines (80-88) get removed then? Maybe there are other instances in which people create more sophisticated document builders and the fields should be added anyway as they are being added in those lines.

In my current code I just made the changes from above and everything works as expected without any changes in this file.

Maybe the maintainers can have a look at this.

doc.metadata[field_key] = hit["_source"].get(field_key, None)
return doc

doc_builder = doc_builder or default_doc_builder

Expand All @@ -87,7 +90,7 @@ def default_doc_builder(hit: Dict) -> Document:
]:
hit["_source"]["metadata"][field] = hit["_source"][field]

doc = doc_builder(hit)
doc = doc_builder(hit, fields)
documents.append((doc, hit["_score"]))

return documents
Expand Down
Loading