Skip to content

Commit

Permalink
Merge branch 'mainline' into joshua/hybrid-search
Browse files Browse the repository at this point in the history
  • Loading branch information
farshidz authored Jul 10, 2024
2 parents a658b3d + 5ee1ec3 commit 8db202c
Show file tree
Hide file tree
Showing 9 changed files with 183 additions and 244 deletions.
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,5 @@ pillow
numpy
pytest
dataclasses
pydantic>=2.0.0
pydantic>=2.0.0
requests_mock
9 changes: 8 additions & 1 deletion src/marqo/_httprequests.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,14 @@ def __to_json(
) -> Any:
if request.content == b'':
return request
return request.json()
try:
return request.json()
except requests.exceptions.JSONDecodeError as e:
# Handle non-JSON response here
raise MarqoWebError(message=request.text,
code="response_not_in_json_format",
error_type="response_not_in_json_format_type",
status_code=500) from e

@staticmethod
def _validate(
Expand Down
76 changes: 12 additions & 64 deletions tests/cloud_test_logic/cloud_test_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ class CloudTestIndex(str, Enum):
unstructured_text = "pymarqo_unstr_txt"
unstructured_image = "pymarqo_unstr_img"
unstructured_text_custom_prepro = "pymarqo_unstr_txt_cstm_pre"
unstructured_no_model = "pymarqo_unstr_no_model"

structured_image_prepro = "pymarqo_str_img_prepro"
structured_image_custom = "pymarqo_str_img_custom"
Expand All @@ -41,7 +42,6 @@ class CloudTestIndex(str, Enum):


index_name_to_settings_mappings = {
# TODO Due to the resources limit of the staging cluster, we only use 2 indexes for testing purpose now
CloudTestIndex.unstructured_text: {
"type": "unstructured",
"treatUrlsAndPointersAsImages": False,
Expand Down Expand Up @@ -79,69 +79,6 @@ class CloudTestIndex(str, Enum):
"patchMethod": "simple",
}
},
# CloudTestIndex.unstructured_text_custom_prepro: {
# "type": "unstructured",
# "treatUrlsAndPointersAsImages": False,
# "model": "test-model",
# "modelProperties": {
# "name": "sentence-transformers/multi-qa-MiniLM-L6-cos-v1",
# "dimensions": 384,
# "tokens": 128,
# "type": "sbert"
# },
# "normalizeEmbeddings": True,
# "textPreprocessing": {
# "splitLength": 2,
# "splitOverlap": 1,
# "splitMethod": "sentence",
# },
#
# "storageClass": "marqo.balanced",
# "numberOfReplicas": 1,
# },
# Structured indexes
# CloudTestIndex.structured_image_prepro: {
# "type": "structured",
# "model": "open_clip/ViT-B-16/laion2b_s34b_b88k",
# "allFields": [
# {"name": "text_field_1", "type": "text", "features": ["lexical_search", "filter"]},
# {"name": "text_field_2", "type": "text", "features": ["filter"]},
# {"name": "image_field_1", "type": "image_pointer"},
# {"name": "array_field_1", "type": "array<text>", "features": ["filter"]},
# {"name": "float_field_1", "type": "float", "features": ["filter", "score_modifier"]},
# {"name": "int_field_1", "type": "int", "features": ["filter", "score_modifier"]},
# {"name": "bool_field_1", "type": "bool", "features": ["filter"]},
# ],
# "tensorFields": ["text_field_1", "image_field_1", "text_field_2"],
# "imagePreprocessing": {"patchMethod": "simple"},
#
# "inferenceType": "marqo.GPU",
# "storageClass": "marqo.balanced",
# },
# CloudTestIndex.structured_image_custom: {
# "type": "structured",
# "treatUrlsAndPointersAsImages": True,
# "model": "test-image-model",
# "modelProperties": {
# "name": "ViT-B-32-quickgelu",
# "dimensions": 512,
# "url": "https://github.com/mlfoundations/open_clip/releases/download/v0.2-weights/vit_b_32-quickgelu-laion400m_avg-8a00ab3c.pt",
# "type": "open_clip",
# },
# "allFields": [
# {"name": "text_field_1", "type": "text", "features": ["lexical_search", "filter"]},
# {"name": "text_field_2", "type": "text", "features": ["filter"]},
# {"name": "image_field_1", "type": "image_pointer"},
# {"name": "array_field_1", "type": "array<text>", "features": ["filter"]},
# {"name": "float_field_1", "type": "float", "features": ["filter", "score_modifier"]},
# {"name": "int_field_1", "type": "int", "features": ["filter", "score_modifier"]},
# {"name": "bool_field_1", "type": "bool", "features": ["filter"]},
# ],
# "tensorFields": ["text_field_1", "image_field_1", "text_field_2"],
#
# "inferenceType": "marqo.CPU.large",
# "numberOfInferences": 2,
# },
CloudTestIndex.structured_text: {
"type": "structured",
"model": "hf/e5-base-v2",
Expand All @@ -155,4 +92,15 @@ class CloudTestIndex(str, Enum):
"storageClass": "marqo.balanced",
"numberOfShards": 2,
},
CloudTestIndex.unstructured_no_model: {
"type": "unstructured",
"treatUrlsAndPointersAsImages": False,
"inferenceType": "marqo.CPU.small",
"storageClass": "marqo.basic",
"model": "no_model",
"modelProperties": {
"type": "no_model",
"dimensions": "512"
},
}
}
22 changes: 11 additions & 11 deletions tests/marqo_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,7 @@ def setUpClass(cls) -> None:
cls.structured_index_name = "structured_index"
cls.structured_image_index_name = "structured_image_index"
cls.unstructured_image_index_name = "unstructured_image_index"
cls.unstructured_no_model_index_name = "unstructured_no_model_index"
cls.structured_image_index_name_simple_preprocessing_method = \
"structured_image_index_simple_preprocessing_method"
# TODO: include structured when boolean_field bug for structured is fixed
Expand Down Expand Up @@ -251,18 +252,17 @@ def setUpClass(cls) -> None:
],
"tensorFields": ["text_field_1", "text_field_2", "text_field_3", "image_field_1"],
"model": "ViT-B/32",
},
{
"indexName": cls.unstructured_no_model_index_name,
"type": "unstructured",
"model": "no_model",
"treatUrlsAndPointersAsImages": True,
"modelProperties": {
"type": "no_model",
"dimensions": 512
}
}
# {
# "indexName": cls.structured_image_index_name_simple_preprocessing_method,
# "type": "structured",
# "allFields": [{"name": "text_field_1", "type": "text"},
# {"name": "text_field_2", "type": "text"},
# {"name": "text_field_3", "type": "text"}],
# "tensorFields": ["text_field_1", "text_field_2", "text_field_3"],
# "model": "ViT-B/16",
# "imagePreprocessingMethod": None,
# "treatUrlsAndPointersAsImages": True,
# },
])
except Exception as e:
print("Error creating indexes: ", e)
Expand Down
15 changes: 12 additions & 3 deletions tests/v2_tests/test__httprequests.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,16 @@
import os
import unittest
from unittest.mock import patch, MagicMock
import pytest

import pytest
import requests.exceptions
import requests_mock

from marqo._httprequests import HttpRequests
from marqo.config import Config
from marqo.default_instance_mappings import DefaultInstanceMappings
from marqo.marqo_cloud_instance_mappings import MarqoCloudInstanceMappings
from marqo.errors import MarqoWebError
from marqo.marqo_cloud_instance_mappings import MarqoCloudInstanceMappings


@pytest.mark.fixed
Expand Down Expand Up @@ -123,4 +124,12 @@ def test_environment_variable_can_affect_construct_path(self):
for path in test_cases:
with self.subTest(f"base_url={custom_cloud_url}, path={path}"):
result=self.construct_path_helper(custom_cloud_url, path)
self.assertEqual(f"{custom_cloud_url}/api/v2/{path}", result)
self.assertEqual(f"{custom_cloud_url}/api/v2/{path}", result)

def test_http_request_raiseProperErrorIfResponseNotInJsonFormat(self):
with requests_mock.Mocker() as m:
m.get('http://example.com/api/endpoint', text='Not a JSON response')
response = requests.get('http://example.com/api/endpoint')
with self.assertRaises(MarqoWebError) as cm:
HttpRequests._validate(response)
self.assertEqual(cm.exception.code, "response_not_in_json_format")
Loading

0 comments on commit 8db202c

Please sign in to comment.