Skip to content

Commit

Permalink
Merge branch 'mainline' into joshua/hybrid-search
Browse files Browse the repository at this point in the history
  • Loading branch information
vicilliar committed Jun 26, 2024
2 parents 39e2ce2 + e5780d4 commit 55b5e03
Show file tree
Hide file tree
Showing 6 changed files with 54 additions and 49 deletions.
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
"tox"
],
name="marqo",
version="3.5.0",
version="3.5.1",
author="marqo org",
author_email="org@marqo.io",
description="Tensor search for humans",
Expand Down
5 changes: 4 additions & 1 deletion src/marqo/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,10 @@ def delete_index(self, index_name: str, wait_for_readiness=True) -> Dict[str, An
cloud_wait_for_index_status(self.http, index_name, enums.IndexStatus.DELETED)
return res
except errors.MarqoWebError as e:
return e.message
if "index_not_found" in str(e):
return e.message
else:
raise e

def get_index(self, index_name: str) -> Index:
"""Get the index.
Expand Down
4 changes: 2 additions & 2 deletions src/marqo/version.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
__marqo_version__ = "2.6.0"
__marqo_version__ = "2.8.0"
__marqo_release_page__ = f"https://github.com/marqo-ai/marqo/releases/tag/{__marqo_version__}"

__minimum_supported_marqo_version__ = "2.0"
__minimum_supported_marqo_version__ = "2.6.0"


def supported_marqo_version() -> str:
Expand Down
31 changes: 13 additions & 18 deletions tests/cloud_test_logic/cloud_test_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,14 +42,14 @@ class CloudTestIndex(str, Enum):

index_name_to_settings_mappings = {
# TODO Due to the resources limit of the staging cluster, we only use 2 indexes for testing purpose now
# CloudTestIndex.unstructured_text: {
# "type": "unstructured",
# "treatUrlsAndPointersAsImages": False,
# "model": "hf/e5-base-v2",
#
# "inferenceType": "marqo.CPU.small",
# "storageClass": "marqo.basic",
# },
CloudTestIndex.unstructured_text: {
"type": "unstructured",
"treatUrlsAndPointersAsImages": False,
"model": "hf/e5-base-v2",

"inferenceType": "marqo.CPU.small",
"storageClass": "marqo.basic",
},
CloudTestIndex.unstructured_image: {
"type": "unstructured",
"treatUrlsAndPointersAsImages": True,
Expand All @@ -61,7 +61,7 @@ class CloudTestIndex(str, Enum):
CloudTestIndex.structured_image: {
"type": "structured",
"model": "open_clip/ViT-B-32/laion2b_s34b_b79k",
"infereceType": "marqo.CPU.small",
"inferenceType": "marqo.CPU.small",
"storageClass": "marqo.basic",
"allFields": [
{"name": "text_field_1", "type": "text", "features": ["lexical_search", "filter"]},
Expand Down Expand Up @@ -144,19 +144,14 @@ class CloudTestIndex(str, Enum):
# },
CloudTestIndex.structured_text: {
"type": "structured",
"treatUrlsAndPointersAsImages": False,
"model": "hf/all_datasets_v4_MiniLM-L6",
"model": "hf/e5-base-v2",
"allFields": [
{"name": "text_field_1", "type": "text", "features": ["lexical_search", "filter"]},
{"name": "text_field_2", "type": "text", "features": ["filter"]},
{"name": "text_field_2", "type": "text", "features": ["lexical_search", "filter"]},
{"name": "text_field_3", "type": "text", "features": ["lexical_search"]},
{"name": "array_field_1", "type": "array<text>", "features": ["filter"]},
{"name": "float_field_1", "type": "float", "features": ["filter", "score_modifier"]},
{"name": "int_field_1", "type": "int", "features": ["filter", "score_modifier"]},
{"name": "bool_field_1", "type": "bool", "features": ["filter"]},
],
{"name": "int_field_1", "type": "int", "features": ["score_modifier"]},
{"name": "int_filter_field_1", "type": "int", "features": ["filter", "score_modifier"]}],
"tensorFields": ["text_field_1", "text_field_2", "text_field_3"],

"storageClass": "marqo.balanced",
"numberOfShards": 2,
},
Expand Down
56 changes: 31 additions & 25 deletions tests/v2_tests/test_embed.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,16 @@
import copy
import marqo
from marqo import enums
from unittest import mock
import requests
import random
import math
import time
from tests.marqo_test import MarqoTestCase, CloudTestIndex
from marqo.errors import MarqoWebError
from pytest import mark
import numpy as np
from pytest import mark

from marqo.errors import MarqoWebError
from tests.marqo_test import MarqoTestCase, CloudTestIndex


@mark.fixed
class TestEmbed(MarqoTestCase):

def setUp(self):
self.test_cases = [(CloudTestIndex.structured_text, self.unstructured_index_name)]

def test_embed_single_string(self):
"""Embeds a string. Use add docs and get docs with tensor facets to ensure the vector is correct.
Checks the basic functionality and response structure. Also checks that the request level prefix override works."""
Expand All @@ -29,21 +26,22 @@ def test_embed_single_string(self):
"_id": "doc1",
"text_field_1": "Jimmy Butler is the GOAT."
}

res_1 = self.client.index(test_index_name).add_documents([d1], tensor_fields=tensor_fields)

# Get doc with tensor facets (for reference vector)
retrieved_d1 = self.client.index(test_index_name).get_document(
document_id="doc1", expose_facets=True)

# Call embed
embed_res_1 = self.client.index(test_index_name).embed("Jimmy Butler is the GOAT.", content_type="document")
embed_res_1 = self.client.index(test_index_name).embed("Jimmy Butler is the GOAT.",
content_type="document")

# Assert that the
self.assertIn("processingTimeMs", embed_res_1)
self.assertEqual(embed_res_1["content"], "Jimmy Butler is the GOAT.")
self.assertTrue(np.allclose(embed_res_1["embeddings"][0], retrieved_d1["_tensor_facets"][0]["_embedding"]))

self.assertTrue(np.allclose(embed_res_1["embeddings"][0], retrieved_d1["_tensor_facets"][0]["_embedding"],
atol=1e-4))

def test_request_level_prefix_override_embed_add_docs(self):
"""Checks that the request level prefix override works."""
Expand All @@ -59,18 +57,21 @@ def test_request_level_prefix_override_embed_add_docs(self):
"_id": "doc1",
"text_field_1": "Jimmy Butler is the GOAT."
}
res = self.client.index(test_index_name).add_documents([d1], tensor_fields=tensor_fields, text_chunk_prefix="test query: ")
res = self.client.index(test_index_name).add_documents([d1], tensor_fields=tensor_fields,
text_chunk_prefix="test query: ")

# Get doc with tensor facets (for reference vector)
retrieved_d1 = self.client.index(test_index_name).get_document(
document_id="doc1", expose_facets=True)

embed_res = self.client.index(test_index_name).embed("test query: Jimmy Butler is the GOAT.", content_type=None)
embed_res = self.client.index(test_index_name).embed("test query: Jimmy Butler is the GOAT.",
content_type=None)

# Assert request level prefix override
self.assertIn("processingTimeMs", embed_res)
self.assertEqual(embed_res["content"], "test query: Jimmy Butler is the GOAT.")
self.assertTrue(np.allclose(embed_res["embeddings"][0], retrieved_d1["_tensor_facets"][0]["_embedding"]))
self.assertTrue(np.allclose(embed_res["embeddings"][0], retrieved_d1["_tensor_facets"][0]["_embedding"],
atol=1e-4))


def test_embed_with_device(self):
Expand All @@ -95,10 +96,12 @@ def test_embed_with_device(self):
document_id="doc1", expose_facets=True)

# Call embed
embed_res = self.client.index(test_index_name).embed(content="Jimmy Butler is the GOAT.", device="cpu", content_type="document")
embed_res = self.client.index(test_index_name).embed(content="Jimmy Butler is the GOAT.", device="cpu",
content_type="document")
self.assertIn("processingTimeMs", embed_res)
self.assertEqual(embed_res["content"], "Jimmy Butler is the GOAT.")
self.assertTrue(np.allclose(embed_res["embeddings"][0], retrieved_d1["_tensor_facets"][0] ["_embedding"]))
self.assertTrue(np.allclose(embed_res["embeddings"][0], retrieved_d1["_tensor_facets"][0] ["_embedding"],
atol=1e-4))

def test_embed_single_dict(self):
"""Embeds a dict. Use add docs and get docs with tensor facets to ensure the vector is correct.
Expand All @@ -122,11 +125,14 @@ def test_embed_single_dict(self):
document_id="doc1", expose_facets=True)

# Call embed
embed_res = self.client.index(test_index_name).embed(content={"Jimmy Butler is the GOAT.": 1}, content_type="document")
embed_res = self.client.index(test_index_name).embed(content={"Jimmy Butler is the GOAT.": 1},
content_type="document")

self.assertIn("processingTimeMs", embed_res)
self.assertEqual(embed_res["content"], {"Jimmy Butler is the GOAT.": 1})
self.assertTrue(np.allclose(embed_res["embeddings"][0], retrieved_d1["_tensor_facets"][0] ["_embedding"]))
self.assertTrue(np.allclose(embed_res["embeddings"][0], retrieved_d1["_tensor_facets"][0]["_embedding"],
atol=1e-4))


def test_embed_list_content(self):
"""Embeds a list with string and dict. Use add docs and get docs with tensor facets to ensure the vector is correct.
Expand Down Expand Up @@ -161,9 +167,9 @@ def test_embed_list_content(self):
self.assertIn("processingTimeMs", embed_res)
self.assertEqual(embed_res["content"], [{"Jimmy Butler is the GOAT.": 1}, "Alex Caruso is the GOAT."])
self.assertTrue(
np.allclose(embed_res["embeddings"][0], retrieved_docs["results"][0]["_tensor_facets"][0]["_embedding"], atol=1e-6))
np.allclose(embed_res["embeddings"][0], retrieved_docs["results"][0]["_tensor_facets"][0]["_embedding"], atol=1e-4))
self.assertTrue(
np.allclose(embed_res["embeddings"][1], retrieved_docs["results"][1]["_tensor_facets"][0]["_embedding"], atol=1e-6))
np.allclose(embed_res["embeddings"][1], retrieved_docs["results"][1]["_tensor_facets"][0]["_embedding"], atol=1e-4))


def test_embed_non_numeric_weight_fails(self):
Expand All @@ -176,4 +182,4 @@ def test_embed_non_numeric_weight_fails(self):
with self.assertRaises(MarqoWebError) as e:
self.client.index(test_index_name).embed(content={"text to embed": "not a number"})

self.assertIn("not a valid float", str(e.exception))
self.assertIn("not a valid float", str(e.exception))
5 changes: 3 additions & 2 deletions tests/v2_tests/test_recommend.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from marqo.enums import InterpolationMethod
from tests.marqo_test import MarqoTestCase

from tests.cloud_test_logic.cloud_test_index import CloudTestIndex

@mark.fixed
class TestRecommend(MarqoTestCase):
Expand Down Expand Up @@ -52,8 +52,9 @@ def test_recommend_allFields(self):
"""
Test recommend with all fields provided
"""

self.test_cases = [(CloudTestIndex.structured_text, self.structured_index_name), ]
for cloud_test_index_to_use, open_source_test_index_name in self.test_cases:
open_source_test_index_name = self.structured_index_name
test_index_name = self.get_test_index_name(
cloud_test_index_to_use=cloud_test_index_to_use,
open_source_test_index_name=open_source_test_index_name
Expand Down

0 comments on commit 55b5e03

Please sign in to comment.