Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -320,6 +320,8 @@ def __init__(
] = ApproxRetrievalStrategy(),
es_params: Optional[Dict[str, Any]] = None,
custom_index_settings: Optional[Dict[str, Any]] = None,
num_dimensions: Optional[int] = None,
metadata_mappings: Optional[Dict[str, Any]] = None,
):
if isinstance(strategy, BaseRetrievalStrategy):
strategy = _convert_retrieval_strategy(
Expand All @@ -345,8 +347,10 @@ def __init__(
index=index_name,
retrieval_strategy=strategy,
embedding_service=embedding_service,
num_dimensions=num_dimensions,
text_field=query_field,
vector_field=vector_query_field,
metadata_mappings=metadata_mappings,
user_agent=user_agent("langchain-py-vs"),
custom_index_settings=custom_index_settings,
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -320,6 +320,8 @@ def __init__(
] = ApproxRetrievalStrategy(),
es_params: Optional[Dict[str, Any]] = None,
custom_index_settings: Optional[Dict[str, Any]] = None,
num_dimensions: Optional[int] = None,
metadata_mappings: Optional[Dict[str, Any]] = None,
):
if isinstance(strategy, BaseRetrievalStrategy):
strategy = _convert_retrieval_strategy(
Expand All @@ -345,8 +347,10 @@ def __init__(
index=index_name,
retrieval_strategy=strategy,
embedding_service=embedding_service,
num_dimensions=num_dimensions,
text_field=query_field,
vector_field=vector_query_field,
metadata_mappings=metadata_mappings,
user_agent=user_agent("langchain-py-vs"),
custom_index_settings=custom_index_settings,
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1051,3 +1051,85 @@ async def test_elasticsearch_delete_ids(
await docsearch.adelete([ids[3]])
output = await docsearch.asimilarity_search("gni", k=10)
assert len(output) == 0

@pytest.mark.asyncio
async def test_num_dimensions_mismatch_and_match(
self, es_params: dict, index_name: str
) -> None:
"""Test that mismatched num_dimensions causes an error."""
texts = ["foo", "bar"]

# Test 1: Mismatch should fail
with pytest.raises(Exception): # Should fail when trying to add documents
docsearch = await AsyncElasticsearchStore.afrom_texts(
texts,
AsyncConsistentFakeEmbeddings(), # Creates 16-dimensional vectors
num_dimensions=5, # Mismatch: 5 vs 16
**es_params,
index_name=f"{index_name}_mismatch", # Use separate index
)

# Test 2: Match should work
docsearch = await AsyncElasticsearchStore.afrom_texts(
texts,
AsyncConsistentFakeEmbeddings(), # Creates 16-dimensional vectors
num_dimensions=16, # Match: 16 vs 16
**es_params,
index_name=f"{index_name}_match", # Use separate index
)

# Verify it works by doing a search
results = await docsearch.asimilarity_search("foo", k=1)
assert results == [Document(page_content="foo")]

await docsearch.aclose()

@pytest.mark.asyncio
async def test_metadata_mappings_integration(
self, es_params: dict, index_name: str
) -> None:
"""Test that metadata_mappings parameter works correctly.

This test verifies that custom metadata field mappings are properly applied to
Elasticsearch index, allowing for proper indexing and searching of metadata.
"""
metadata_mappings = {
"category": {"type": "keyword"},
"score": {"type": "float"},
"tags": {"type": "text"},
}

texts = ["Document about cats", "Document about dogs", "Document about birds"]
metadatas = [
{"category": "animals", "score": 0.9, "tags": "some tag about cats"},
{"category": "animals", "score": 0.8, "tags": "some tag about dogs"},
{"category": "animals", "score": 0.7, "tags": "some tag about birds"},
]

docsearch = await AsyncElasticsearchStore.afrom_texts(
texts,
AsyncConsistentFakeEmbeddings(),
metadatas=metadatas,
metadata_mappings=metadata_mappings,
num_dimensions=16,
**es_params,
index_name=index_name,
)

mapping_response = await docsearch.client.indices.get_mapping(index=index_name)
mapping_properties = mapping_response[index_name]["mappings"]["properties"]

assert "metadata" in mapping_properties
metadata_props = mapping_properties["metadata"]["properties"]

assert metadata_props["category"] == {"type": "keyword"}
assert metadata_props["score"] == {"type": "float"}
assert metadata_props["tags"] == {"type": "text"}

results = await docsearch.asimilarity_search(
"pets", k=3, filter=[{"term": {"metadata.category": "animals"}}]
)

assert len(results) == 3

await docsearch.aclose()
Original file line number Diff line number Diff line change
Expand Up @@ -1029,3 +1029,85 @@ def test_elasticsearch_delete_ids(self, es_params: dict, index_name: str) -> Non
docsearch.delete([ids[3]])
output = docsearch.similarity_search("gni", k=10)
assert len(output) == 0

@pytest.mark.sync
def test_num_dimensions_mismatch_and_match(
self, es_params: dict, index_name: str
) -> None:
"""Test that mismatched num_dimensions causes an error."""
texts = ["foo", "bar"]

# Test 1: Mismatch should fail
with pytest.raises(Exception): # Should fail when trying to add documents
docsearch = ElasticsearchStore.from_texts(
texts,
ConsistentFakeEmbeddings(), # Creates 16-dimensional vectors
num_dimensions=5, # Mismatch: 5 vs 16
**es_params,
index_name=f"{index_name}_mismatch", # Use separate index
)

# Test 2: Match should work
docsearch = ElasticsearchStore.from_texts(
texts,
ConsistentFakeEmbeddings(), # Creates 16-dimensional vectors
num_dimensions=16, # Match: 16 vs 16
**es_params,
index_name=f"{index_name}_match", # Use separate index
)

# Verify it works by doing a search
results = docsearch.similarity_search("foo", k=1)
assert results == [Document(page_content="foo")]

docsearch.close()

@pytest.mark.sync
def test_metadata_mappings_integration(
self, es_params: dict, index_name: str
) -> None:
"""Test that metadata_mappings parameter works correctly.

This test verifies that custom metadata field mappings are properly applied to
Elasticsearch index, allowing for proper indexing and searching of metadata.
"""
metadata_mappings = {
"category": {"type": "keyword"},
"score": {"type": "float"},
"tags": {"type": "text"},
}

texts = ["Document about cats", "Document about dogs", "Document about birds"]
metadatas = [
{"category": "animals", "score": 0.9, "tags": "some tag about cats"},
{"category": "animals", "score": 0.8, "tags": "some tag about dogs"},
{"category": "animals", "score": 0.7, "tags": "some tag about birds"},
]

docsearch = ElasticsearchStore.from_texts(
texts,
ConsistentFakeEmbeddings(),
metadatas=metadatas,
metadata_mappings=metadata_mappings,
num_dimensions=16,
**es_params,
index_name=index_name,
)

mapping_response = docsearch.client.indices.get_mapping(index=index_name)
mapping_properties = mapping_response[index_name]["mappings"]["properties"]

assert "metadata" in mapping_properties
metadata_props = mapping_properties["metadata"]["properties"]

assert metadata_props["category"] == {"type": "keyword"}
assert metadata_props["score"] == {"type": "float"}
assert metadata_props["tags"] == {"type": "text"}

results = docsearch.similarity_search(
"pets", k=3, filter=[{"term": {"metadata.category": "animals"}}]
)

assert len(results) == 3

docsearch.close()
88 changes: 87 additions & 1 deletion libs/elasticsearch/tests/unit_tests/_async/test_vectorstores.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
"""Test Elasticsearch functionality."""

import inspect
import re
from typing import Any, AsyncGenerator, Dict, List, Optional
from unittest.mock import AsyncMock
from unittest.mock import AsyncMock, patch

import pytest
from elasticsearch import AsyncElasticsearch
from elasticsearch.helpers.vectorstore import VectorStore as EVectorStore
from langchain_core.documents import Document

from langchain_elasticsearch._async.vectorstores import (
Expand Down Expand Up @@ -417,3 +419,87 @@ async def test_elasticsearch_hybrid_scores_guard(
await hybrid_store.asimilarity_search_by_vector_with_relevance_scores(
[1, 2, 3]
)

@pytest.mark.asyncio
async def test_parameter_forwarding_to_evectorstore(self) -> None:
"""Test to catch missing AsyncEVectorStore parameters.

This test compares the AsyncEVectorStore constructor signature against what
AsyncElasticsearchStore actually forwards. If AsyncEVectorStore adds new
parameters, this test will fail and alert us to update AsyncElasticsearchStore.
"""

client = AsyncElasticsearch(hosts=["http://dummy:9200"])

# Get EVectorStore constructor signature
evectorstore_sig = inspect.signature(EVectorStore.__init__)
# Remove self from the parameters set
evectorstore_params = set(evectorstore_sig.parameters.keys()) - {"self"}

with patch(
"langchain_elasticsearch._async.vectorstores.EVectorStore"
) as mock_evectorstore:
# Mock the close method to be async
mock_evectorstore.return_value.close = AsyncMock()

store = AsyncElasticsearchStore(
index_name="test_index",
es_connection=client,
num_dimensions=1536,
)

# Get what parameters were actually passed to EVectorStore
mock_evectorstore.assert_called_once()
call_args = mock_evectorstore.call_args
forwarded_params = set(call_args.kwargs.keys())

# Check for missing parameters
missing_params = evectorstore_params - forwarded_params
if missing_params:
pytest.fail(
f"AsyncElasticsearchStore is missing these EVectorStore parameters:"
f"{missing_params}. Please add them to AsyncElasticsearchStore "
f"and forward them to EVectorStore."
)

# Check for unexpected parameters
unexpected_params = forwarded_params - evectorstore_params
if unexpected_params:
pytest.fail(
f"AsyncElasticsearchStore is forwarding unexpected parameters to "
f"EVectorStore: {unexpected_params}. These parameters don't exist "
f"in EVectorStore.__init__."
)

await store.aclose()

@pytest.mark.asyncio
async def test_parameter_forwarding_defaults(self) -> None:
"""Test that default parameter values are properly forwarded to
AsyncEVectorStore."""

client = AsyncElasticsearch(hosts=["http://dummy:9200"])

with patch(
"langchain_elasticsearch._async.vectorstores.EVectorStore"
) as mock_evectorstore:
# Mock the close method to be async
mock_evectorstore.return_value.close = AsyncMock()

# Test with minimal parameters (should use defaults)
store = AsyncElasticsearchStore(
index_name="test_index", es_connection=client
)

# Verify EVectorStore was called with default values
mock_evectorstore.assert_called_once()
call_args = mock_evectorstore.call_args

# Check default values
assert call_args.kwargs["index"] == "test_index"
assert call_args.kwargs["client"] == client
assert call_args.kwargs["vector_field"] == "vector" # default
assert call_args.kwargs["text_field"] == "text" # default
assert call_args.kwargs["num_dimensions"] is None # default

await store.aclose()
Loading