diff --git a/eva/catalog/catalog_manager.md b/eva/catalog/catalog_manager.md index 70a4531be1..a8ce1ad9eb 100644 --- a/eva/catalog/catalog_manager.md +++ b/eva/catalog/catalog_manager.md @@ -1,7 +1,6 @@ # Catalog Manager -Explanation for developers on how to use the eva catalog_manager. -CatalogManager class that provides a set of services to interact with a database that stores metadata about tables, columns, and user-defined functions (UDFs). Information like what is the data type in a certain column in a table, type of a table, its name, etc.. It contains functions to get, insert and delete catalog entries for Tables, UDFs, UDF IOs, Columns and Indexes. +CatalogManager class provides a set of services to interact with a database that stores metadata about tables, columns, and user-defined functions (UDFs). Information like what is the data type in a certain column in a table, type of a table, its name, etc.. It contains functions to get, insert and delete catalog entries for Tables, UDFs, UDF IOs, Columns and Indexes. This data is stored in the eva_catalog.db file which can be found in ~/.eva// folder. @@ -9,18 +8,20 @@ Catalog manager currently has 7 services in it: ``` TableCatalogService() ColumnCatalogService() +IndexCatalogService() UdfCatalogService() UdfIOCatalogService() -IndexCatalogService() UdfCostCatalogService() UdfMetadataCatalogService() ``` ## Catalog Services This class provides functionality related to a table catalog, including inserting, getting, deleting, and renaming table entries, as well as retrieving all entries. e.g. the TableCatalogService contains code to get, insert and delete a table. + ## Catalog Models These contain the data model that is used by the catalog services. Each model represents a table in the underlying database. + ### TableCatalog Fields: ``` diff --git a/eva/catalog/services/base_service.py b/eva/catalog/services/base_service.py index 386d86fc7d..753dded482 100644 --- a/eva/catalog/services/base_service.py +++ b/eva/catalog/services/base_service.py @@ -12,6 +12,10 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from typing import List + +from sqlalchemy.orm.exc import NoResultFound + from eva.catalog.models.base_model import BaseModel @@ -23,3 +27,10 @@ class BaseService: def __init__(self, model: BaseModel): self.model = model + + def get_all_entries(self) -> List: + try: + entries = self.model.query.all() + return [entry.as_dataclass() for entry in entries] + except NoResultFound: + return [] diff --git a/eva/catalog/services/index_catalog_service.py b/eva/catalog/services/index_catalog_service.py index d128cd44e8..3cd183855b 100644 --- a/eva/catalog/services/index_catalog_service.py +++ b/eva/catalog/services/index_catalog_service.py @@ -13,7 +13,6 @@ # See the License for the specific language governing permissions and # limitations under the License. import os -from typing import List from sqlalchemy.orm.exc import NoResultFound @@ -80,10 +79,3 @@ def delete_entry_by_name(self, name: str): logger.exception("Delete index failed for name {}".format(name)) return False return True - - def get_all_entries(self) -> List[IndexCatalogEntry]: - try: - entries = self.model.query.all() - return [entry.as_dataclass() for entry in entries] - except NoResultFound: - return [] diff --git a/eva/catalog/services/table_catalog_service.py b/eva/catalog/services/table_catalog_service.py index f7a0c314c4..4a2a70948e 100644 --- a/eva/catalog/services/table_catalog_service.py +++ b/eva/catalog/services/table_catalog_service.py @@ -12,9 +12,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from typing import List - -from sqlalchemy.orm.exc import NoResultFound from eva.catalog.catalog_type import TableType from eva.catalog.models.table_catalog import TableCatalog, TableCatalogEntry @@ -123,10 +120,3 @@ def rename_entry(self, table: TableCatalogEntry, new_name: str): ) logger.error(err_msg) raise RuntimeError(err_msg) - - def get_all_entries(self) -> List[TableCatalogEntry]: - try: - entries = self.model.query.all() - return [entry.as_dataclass() for entry in entries] - except NoResultFound: - return [] diff --git a/eva/catalog/services/udf_catalog_service.py b/eva/catalog/services/udf_catalog_service.py index 184be9914a..8f7373c1e3 100644 --- a/eva/catalog/services/udf_catalog_service.py +++ b/eva/catalog/services/udf_catalog_service.py @@ -12,8 +12,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from typing import List - from sqlalchemy.orm.exc import NoResultFound from eva.catalog.models.udf_catalog import UdfCatalog, UdfCatalogEntry @@ -87,10 +85,3 @@ def delete_entry_by_name(self, name: str): logger.exception(f"Delete udf failed for name {name} with error {str(e)}") return False return True - - def get_all_entries(self) -> List[UdfCatalogEntry]: - try: - objs = self.model.query.all() - return [obj.as_dataclass() for obj in objs] - except NoResultFound: - return [] diff --git a/eva/udfs/udf_bootstrap_queries.py b/eva/udfs/udf_bootstrap_queries.py index 094af91bc3..805fd7399e 100644 --- a/eva/udfs/udf_bootstrap_queries.py +++ b/eva/udfs/udf_bootstrap_queries.py @@ -179,7 +179,6 @@ def init_builtin_udfs(mode="debug"): ArrayCount_udf_query, Timestamp_udf_query, Crop_udf_query, - YoloV5_udf_query, Open_udf_query, Similarity_udf_query # Disabled because required packages (eg., easy_ocr might not be preinstalled) @@ -196,5 +195,8 @@ def init_builtin_udfs(mode="debug"): ] ) + if mode != "minimal": + queries.extend([YoloV5_udf_query]) + for query in queries: execute_query_fetch_all(query) diff --git a/test/benchmark_tests/conftest.py b/test/benchmark_tests/conftest.py index d8bccfdedd..79f4b4fa48 100644 --- a/test/benchmark_tests/conftest.py +++ b/test/benchmark_tests/conftest.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from test.util import load_inbuilt_udfs +from test.util import load_udfs_for_testing import pytest @@ -25,5 +25,8 @@ def setup_pytorch_tests(): CatalogManager().reset() execute_query_fetch_all("LOAD VIDEO 'data/ua_detrac/ua_detrac.mp4' INTO MyVideo;") execute_query_fetch_all("LOAD VIDEO 'data/mnist/mnist.mp4' INTO MNIST;") - load_inbuilt_udfs() + load_udfs_for_testing() + from eva.udfs.udf_bootstrap_queries import YoloV5_udf_query + + execute_query_fetch_all(YoloV5_udf_query) yield None diff --git a/test/catalog/services/test_udf_catalog_service.py b/test/catalog/services/test_udf_catalog_service.py index 907534cd92..1bf6e7232d 100644 --- a/test/catalog/services/test_udf_catalog_service.py +++ b/test/catalog/services/test_udf_catalog_service.py @@ -70,16 +70,18 @@ def test_udf_drop_by_name(self, mocked): @patch("eva.catalog.services.udf_catalog_service.UdfCatalog") def test_udf_catalog_exception(self, mock_udf_catalog): - mock_udf_catalog.query.filter.side_effect = Exception("filter_error") + mock_udf_catalog.query.filter.side_effect = NoResultFound mock_udf_catalog.query.all.side_effect = NoResultFound service = UdfCatalogService() - with self.assertRaises(Exception): - service.get_entry_by_name(MagicMock()) + result = None + result = service.get_entry_by_name(MagicMock()) + self.assertEqual(result, None) - with self.assertRaises(Exception): - service.get_entry_by_id(MagicMock()) + result = None + result = service.get_entry_by_id(MagicMock()) + self.assertEqual(result, None) self.assertFalse(service.delete_entry_by_name(MagicMock())) diff --git a/test/catalog/services/test_udf_io_catalog_service.py b/test/catalog/services/test_udf_io_catalog_service.py index cd836e7d7b..693067a018 100644 --- a/test/catalog/services/test_udf_io_catalog_service.py +++ b/test/catalog/services/test_udf_io_catalog_service.py @@ -24,7 +24,7 @@ UDF_ID = 123 -class UdfCatalogServiceTest(TestCase): +class UdfIOCatalogServiceTest(TestCase): @patch("eva.catalog.services.udf_io_catalog_service.UdfIOCatalog") def test_get_inputs_by_udf_id_should_query_model_with_id(self, mocked): service = UdfIOCatalogService() @@ -64,3 +64,12 @@ def test_get_outputs_by_udf_id_should_raise(self, mock): self.assertEqual( f"Getting outputs for UDF id {UDF_ID} raised error", str(cm.exception) ) + + @patch("eva.catalog.services.udf_io_catalog_service.UdfIOCatalog") + def test_get_all_entries_should_raise(self, mock): + service = UdfIOCatalogService() + mock.query.all.side_effect = Exception("error") + with self.assertRaises(Exception) as cm: + result = service.get_all_entries() + self.assertEqual(result, []) + self.assertEqual("error", str(cm.exception)) diff --git a/test/catalog/test_sqlalchemy.py b/test/catalog/test_sqlalchemy.py new file mode 100644 index 0000000000..b358d41058 --- /dev/null +++ b/test/catalog/test_sqlalchemy.py @@ -0,0 +1,50 @@ +# coding=utf-8 +# Copyright 2018-2022 EVA +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import unittest +from test.util import get_all_subclasses + +from sqlalchemy import inspect + +from eva.catalog.services.base_service import BaseService +from eva.catalog.sql_config import SQLConfig + + +class SQLAlchemyTests(unittest.TestCase): + def test_sqlalchemy_verify_catalog_tables(self): + sql_session = SQLConfig().session + engine = sql_session.get_bind() + insp = inspect(engine) + table_names = insp.get_table_names() + + try: + for table in table_names: + column_infos = insp.get_columns(table) + # Skip video tables + if len(column_infos) <= 2: + continue + print("\n" + table, end=" : ") + self.assertTrue(len(column_infos) < 10, f"{table} has too many columns") + for column_info in column_infos: + print(column_info["name"], end=" | ") + + service_subclasses = get_all_subclasses(BaseService) + for service_subclass in service_subclasses: + service = service_subclass() + table_tuples = service.get_all_entries() + self.assertTrue( + len(table_tuples) < 100 + ), f"{service_subclass} table has too many tuples" + finally: + sql_session.close() diff --git a/test/integration_tests/test_array_count.py b/test/integration_tests/test_array_count.py index 50e67972e0..f8ada49c7d 100644 --- a/test/integration_tests/test_array_count.py +++ b/test/integration_tests/test_array_count.py @@ -13,7 +13,12 @@ # See the License for the specific language governing permissions and # limitations under the License. import unittest -from test.util import NUM_FRAMES, create_sample_video, file_remove, load_inbuilt_udfs +from test.util import ( + NUM_FRAMES, + create_sample_video, + file_remove, + load_udfs_for_testing, +) import pandas as pd import pytest @@ -31,7 +36,7 @@ def setUpClass(cls): video_file_path = create_sample_video(NUM_FRAMES) load_query = f"LOAD VIDEO '{video_file_path}' INTO MyVideo;" execute_query_fetch_all(load_query) - load_inbuilt_udfs() + load_udfs_for_testing(mode="minimal") @classmethod def tearDownClass(cls): diff --git a/test/integration_tests/test_create_index_executor.py b/test/integration_tests/test_create_index_executor.py index 75f69b6a82..2c0045a2e7 100644 --- a/test/integration_tests/test_create_index_executor.py +++ b/test/integration_tests/test_create_index_executor.py @@ -15,7 +15,7 @@ import os import unittest from pathlib import Path -from test.util import load_inbuilt_udfs +from test.util import load_udfs_for_testing import faiss import numpy as np @@ -53,8 +53,7 @@ def setUpClass(cls): # Reset catalog. CatalogManager().reset() - - load_inbuilt_udfs() + load_udfs_for_testing(mode="minimal") # Create feature vector table and raw input table. feat1 = np.array([[0, 0, 0]]).astype(np.float32) diff --git a/test/integration_tests/test_delete_executor.py b/test/integration_tests/test_delete_executor.py index 35c9121707..94f93d9a45 100644 --- a/test/integration_tests/test_delete_executor.py +++ b/test/integration_tests/test_delete_executor.py @@ -13,7 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. import unittest -from test.util import file_remove, load_inbuilt_udfs +from test.util import file_remove, load_udfs_for_testing import numpy as np import pytest @@ -32,8 +32,7 @@ def setUp(self): # Reset catalog. CatalogManager().reset() - - load_inbuilt_udfs() + load_udfs_for_testing(mode="minimal") create_table_query = """ CREATE TABLE IF NOT EXISTS testDeleteOne diff --git a/test/integration_tests/test_explain_executor.py b/test/integration_tests/test_explain_executor.py index 4e3de498e3..3fa4e434b4 100644 --- a/test/integration_tests/test_explain_executor.py +++ b/test/integration_tests/test_explain_executor.py @@ -13,7 +13,12 @@ # See the License for the specific language governing permissions and # limitations under the License. import unittest -from test.util import create_sample_video, create_table, file_remove, load_inbuilt_udfs +from test.util import ( + create_sample_video, + create_table, + file_remove, + load_udfs_for_testing, +) import pytest @@ -39,7 +44,7 @@ def setUpClass(cls): video_file_path = create_sample_video(NUM_FRAMES) load_query = f"LOAD VIDEO '{video_file_path}' INTO MyVideo;" execute_query_fetch_all(load_query) - load_inbuilt_udfs() + load_udfs_for_testing(mode="minimal") cls.table1 = create_table("table1", 100, 3) cls.table2 = create_table("table2", 500, 3) cls.table3 = create_table("table3", 1000, 3) diff --git a/test/integration_tests/test_insert_executor.py b/test/integration_tests/test_insert_executor.py index 5d7317f6d5..de64fc46ba 100644 --- a/test/integration_tests/test_insert_executor.py +++ b/test/integration_tests/test_insert_executor.py @@ -13,7 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. import unittest -from test.util import create_sample_video, file_remove, load_inbuilt_udfs +from test.util import create_sample_video, file_remove, load_udfs_for_testing import numpy as np import pandas as pd @@ -37,8 +37,7 @@ def setUp(self): ); """ execute_query_fetch_all(query) - - load_inbuilt_udfs() + load_udfs_for_testing(mode="minimal") def tearDown(self): file_remove("dummy.avi") diff --git a/test/integration_tests/test_mat_executor.py b/test/integration_tests/test_mat_executor.py index 223ea79f7a..05d213087d 100644 --- a/test/integration_tests/test_mat_executor.py +++ b/test/integration_tests/test_mat_executor.py @@ -17,7 +17,7 @@ DummyObjectDetector, create_sample_video, file_remove, - load_inbuilt_udfs, + load_udfs_for_testing, ) import pandas as pd @@ -42,7 +42,7 @@ def setUpClass(cls): execute_query_fetch_all(load_query) ua_detrac = f"{EVA_ROOT_DIR}/data/ua_detrac/ua_detrac.mp4" execute_query_fetch_all(f"LOAD VIDEO '{ua_detrac}' INTO UATRAC;") - load_inbuilt_udfs() + load_udfs_for_testing() @classmethod def tearDownClass(cls): diff --git a/test/integration_tests/test_open.py b/test/integration_tests/test_open.py index 7dca132b2e..9d3ed7def1 100644 --- a/test/integration_tests/test_open.py +++ b/test/integration_tests/test_open.py @@ -13,7 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. import unittest -from test.util import create_sample_image, file_remove, load_inbuilt_udfs +from test.util import create_sample_image, file_remove, load_udfs_for_testing import numpy as np import pandas as pd @@ -32,7 +32,7 @@ def setUp(self): CatalogManager().reset() ConfigurationManager() # Load built-in UDFs. - load_inbuilt_udfs() + load_udfs_for_testing(mode="minimal") # Insert image path. self.img_path = create_sample_image() diff --git a/test/integration_tests/test_optimizer_rules.py b/test/integration_tests/test_optimizer_rules.py index 4c5d75791f..72247fca48 100644 --- a/test/integration_tests/test_optimizer_rules.py +++ b/test/integration_tests/test_optimizer_rules.py @@ -13,7 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. import unittest -from test.util import get_physical_query_plan, load_inbuilt_udfs +from test.util import get_physical_query_plan, load_udfs_for_testing import pytest from mock import MagicMock, patch @@ -42,7 +42,7 @@ def setUpClass(cls): ua_detrac = f"{EVA_ROOT_DIR}/data/ua_detrac/ua_detrac.mp4" execute_query_fetch_all(f"LOAD VIDEO '{ua_detrac}' INTO MyVideo;") execute_query_fetch_all(f"LOAD VIDEO '{ua_detrac}' INTO MyVideo2;") - load_inbuilt_udfs() + load_udfs_for_testing(mode="minimal") @classmethod def tearDownClass(cls): diff --git a/test/integration_tests/test_pytorch.py b/test/integration_tests/test_pytorch.py index fb53d9863b..4256ad6a50 100644 --- a/test/integration_tests/test_pytorch.py +++ b/test/integration_tests/test_pytorch.py @@ -15,7 +15,7 @@ import os import unittest from test.markers import windows_skip_marker -from test.util import file_remove, load_inbuilt_udfs +from test.util import file_remove, load_udfs_for_testing import cv2 import numpy as np @@ -50,7 +50,7 @@ def setUpClass(cls): execute_query_fetch_all(f"LOAD VIDEO '{asl_actions}' INTO Asl_actions;") execute_query_fetch_all(f"LOAD IMAGE '{meme1}' INTO MemeImages;") execute_query_fetch_all(f"LOAD IMAGE '{meme2}' INTO MemeImages;") - load_inbuilt_udfs() + load_udfs_for_testing() @classmethod def tearDownClass(cls): diff --git a/test/integration_tests/test_select_executor.py b/test/integration_tests/test_select_executor.py index 2e2d5624fb..ce1beced6e 100644 --- a/test/integration_tests/test_select_executor.py +++ b/test/integration_tests/test_select_executor.py @@ -20,7 +20,7 @@ create_table, file_remove, get_logical_query_plan, - load_inbuilt_udfs, + load_udfs_for_testing, ) import numpy as np @@ -48,7 +48,7 @@ def setUpClass(cls): ua_detrac = f"{EVA_ROOT_DIR}/data/ua_detrac/ua_detrac.mp4" load_query = f"LOAD VIDEO '{ua_detrac}' INTO DETRAC;" execute_query_fetch_all(load_query) - load_inbuilt_udfs() + load_udfs_for_testing() cls.table1 = create_table("table1", 100, 3) cls.table2 = create_table("table2", 500, 3) cls.table3 = create_table("table3", 1000, 3) diff --git a/test/integration_tests/test_similarity.py b/test/integration_tests/test_similarity.py index 65e1b2b3ad..d2dc39725c 100644 --- a/test/integration_tests/test_similarity.py +++ b/test/integration_tests/test_similarity.py @@ -13,7 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. import unittest -from test.util import create_sample_image, load_inbuilt_udfs +from test.util import create_sample_image, load_udfs_for_testing import numpy as np import pandas as pd @@ -31,7 +31,7 @@ def setUp(self): CatalogManager().reset() # Prepare needed UDFs and data_col. - load_inbuilt_udfs() + load_udfs_for_testing(mode="minimal") self.img_path = create_sample_image() # Create base comparison table. diff --git a/test/optimizer/rules/test_rules.py b/test/optimizer/rules/test_rules.py index f6b31f9474..9fe80a6d39 100644 --- a/test/optimizer/rules/test_rules.py +++ b/test/optimizer/rules/test_rules.py @@ -13,7 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. import unittest -from test.util import create_sample_video, load_inbuilt_udfs +from test.util import create_sample_video, load_udfs_for_testing import pytest from mock import MagicMock, patch @@ -84,7 +84,7 @@ def setUpClass(cls): video_file_path = create_sample_video() load_query = f"LOAD VIDEO '{video_file_path}' INTO MyVideo;" execute_query_fetch_all(load_query) - load_inbuilt_udfs() + load_udfs_for_testing(mode="minimal") @classmethod def tearDownClass(cls): diff --git a/test/util.py b/test/util.py index ba677c1fea..10f59e30a3 100644 --- a/test/util.py +++ b/test/util.py @@ -416,8 +416,9 @@ def create_dummy_4d_batches( yield Batch(df) -def load_inbuilt_udfs(): - mode = ConfigurationManager().get_value("core", "mode") +def load_udfs_for_testing(mode="debug"): + # DEBUG OR RELEASE MODE: ALL UDFs + # MINIMAL MODE: NO YOLO init_builtin_udfs(mode=mode)