georgia-tech-db · gaurav274 · Mar 23, 2023 · Jan 22, 2023 · Jan 25, 2023 · Jan 25, 2023
diff --git a/.coveragerc b/.coveragerc
@@ -14,6 +14,3 @@ exclude_lines =
     class LogicalExchangeToPhysical(Rule):
     class LogicalExchange(Operator):
 
-[html]
-show_contexts = True
-
diff --git a/eva/binder/statement_binder.py b/eva/binder/statement_binder.py
@@ -98,10 +98,10 @@ def _bind_create_index_statement(self, node: CreateIndexStatement):
                 col = [
                     col for col in table_ref_obj.columns if col.name == col_def.name
                 ][0]
-                if not col.array_type == NdArrayType.FLOAT32:
-                    raise BinderError("Index input needs to be float32.")
-                if not len(col.array_dimensions) == 2:
-                    raise BinderError("Index input needs to be 2 dimensional.")
+                assert (
+                    col.array_type == NdArrayType.FLOAT32
+                ), "Index input needs to be float32."
+                assert len(col.array_dimensions) == 2
             else:
                 # Output of the UDF should be 2 dimension and float32 type.
                 catalog_manager = CatalogManager()

diff --git a/eva/catalog/catalog_manager.py b/eva/catalog/catalog_manager.py
@@ -40,7 +40,6 @@
 from eva.parser.create_statement import ColumnDefinition
 from eva.parser.table_ref import TableInfo
 from eva.parser.types import FileFormatType
-from eva.utils.errors import CatalogError
 from eva.utils.generic_utils import generate_file_path, get_file_checksum
 from eva.utils.logging_manager import logger
 
@@ -380,14 +379,17 @@ def create_and_insert_multimedia_table_catalog_entry(
         Returns:
             TableCatalogEntry: newly inserted table catalog entry
         """
+        assert format_type in [
+            FileFormatType.VIDEO,
+            FileFormatType.IMAGE,
+        ], f"Format Type {format_type} is not supported"
+
         if format_type is FileFormatType.VIDEO:
             columns = get_video_table_column_definitions()
             table_type = TableType.VIDEO_DATA
         elif format_type is FileFormatType.IMAGE:
             columns = get_image_table_column_definitions()
             table_type = TableType.IMAGE_DATA
-        else:
-            raise CatalogError(f"Format Type {format_type} is not supported")
 
         return self.create_and_insert_table_catalog_entry(
             TableInfo(name), columns, table_type=table_type
@@ -407,10 +409,9 @@ def get_multimedia_metadata_table_catalog_entry(
         # use file_url as the metadata table name
         media_metadata_name = Path(input_table.file_url).stem
         obj = self.get_table_catalog_entry(media_metadata_name)
-        if not obj:
-            err = f"Table with name {media_metadata_name} does not exist in catalog"
-            logger.exception(err)
-            raise CatalogError(err)
+        assert (
+            obj is not None
+        ), f"Table with name {media_metadata_name} does not exist in catalog"
 
         return obj
 
@@ -432,10 +433,7 @@ def create_and_insert_multimedia_metadata_table_catalog_entry(
         # use file_url as the metadata table name
         media_metadata_name = Path(input_table.file_url).stem
         obj = self.get_table_catalog_entry(media_metadata_name)
-        if obj:
-            err_msg = f"Table with name {media_metadata_name} already exists"
-            logger.exception(err_msg)
-            raise CatalogError(err_msg)
+        assert obj is None, "Table with name {media_metadata_name} already exists"
 
         columns = [ColumnDefinition("file_url", ColumnType.TEXT, None, None)]
         obj = self.create_and_insert_table_catalog_entry(

diff --git a/eva/catalog/sql_config.py b/eva/catalog/sql_config.py
@@ -12,6 +12,8 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import os
+
 from sqlalchemy import create_engine, event
 from sqlalchemy.orm import scoped_session, sessionmaker
 
@@ -20,6 +22,17 @@
 IDENTIFIER_COLUMN = "_row_id"
 
 
+def prefix_worker_id(uri: str):
+    try:
+        worker_id = os.environ["PYTEST_XDIST_WORKER"]
+        base = "eva_catalog.db"
+        uri = uri.replace(base, str(worker_id) + "_" + base)
+    except KeyError:
+        # Single threaded mode
+        pass
+    return uri
+
+
 class SQLConfig:
     """Singleton class for configuring connection to the database.
 
@@ -47,7 +60,8 @@ def __init__(self):
         """
         uri = ConfigurationManager().get_value("core", "catalog_database_uri")
         # set echo=True to log SQL
-        self.engine = create_engine(uri)
+        updated_uri = prefix_worker_id(str(uri))
+        self.engine = create_engine(updated_uri)
 
         if self.engine.url.get_backend_name() == "sqlite":
             # enforce foreign key constraint and wal logging for sqlite

diff --git a/eva/executor/abstract_executor.py b/eva/executor/abstract_executor.py
@@ -50,18 +50,10 @@ def children(self) -> List[AbstractExecutor]:
         """
         return self._children
 
-    @children.setter
-    def children(self, children: List["AbstractExecutor"]):
-        self._children = children
-
     @property
     def node(self) -> AbstractPlan:
         return self._node
 
-    @abstractmethod
-    def validate(self):
-        pass
-
     @abstractmethod
     def exec(self) -> Iterable[Batch]:
         """

diff --git a/eva/executor/apply_and_merge_executor.py b/eva/executor/apply_and_merge_executor.py
@@ -15,10 +15,8 @@
 from typing import Iterator
 
 from eva.executor.abstract_executor import AbstractExecutor
-from eva.executor.executor_utils import ExecutorError
 from eva.models.storage.batch import Batch
 from eva.plan_nodes.apply_and_merge_plan import ApplyAndMergePlan
-from eva.utils.logging_manager import logger
 
 
 class ApplyAndMergeExecutor(AbstractExecutor):
@@ -38,26 +36,19 @@ def __init__(self, node: ApplyAndMergePlan):
         self.do_unnest = node.do_unnest
         self.alias = node.alias
 
-    def validate(self):
-        pass
-
     def exec(self, *args, **kwargs) -> Iterator[Batch]:
         child_executor = self.children[0]
         for batch in child_executor.exec(**kwargs):
             res = self.func_expr.evaluate(batch)
-            try:
-                if not res.empty():
-                    if self.do_unnest:
-                        res.unnest()
-
-                    # Merge the results to the input.
-                    # This assumes that the batch index is preserved by the function
-                    # call. Since both the batch and the results are sorted, we could
-                    # perform a sorted merge, though the typical small size of the
-                    # batch and results should not significantly impact performance.
-                    merged_batch = Batch.join(batch, res)
-                    merged_batch.reset_index()
-                    yield merged_batch
-            except Exception as e:
-                logger.error(e)
-                raise ExecutorError(e)
+            if not res.empty():
+                if self.do_unnest:
+                    res.unnest()
+
+                # Merge the results to the input.
+                # This assumes that the batch index is preserved by the function
+                # call. Since both the batch and the results are sorted, we could
+                # perform a sorted merge, though the typical small size of the
+                # batch and results should not significantly impact performance.
+                merged_batch = Batch.join(batch, res)
+                merged_batch.reset_index()
+                yield merged_batch
diff --git a/eva/executor/create_executor.py b/eva/executor/create_executor.py
@@ -24,9 +24,6 @@ def __init__(self, node: CreatePlan):
         super().__init__(node)
         self.catalog = CatalogManager()
 
-    def validate(self):
-        pass
-
     def exec(self):
         if not handle_if_not_exists(self.node.table_info, self.node.if_not_exists):
             catalog_entry = self.catalog.create_and_insert_table_catalog_entry(

diff --git a/eva/executor/create_index_executor.py b/eva/executor/create_index_executor.py
@@ -47,9 +47,6 @@ class CreateIndexExecutor(AbstractExecutor):
     def __init__(self, node: CreateIndexPlan):
         super().__init__(node)
 
-    def validate(self):
-        pass
-
     def exec(self):
         catalog_manager = CatalogManager()
         if catalog_manager.get_index_catalog_entry_by_name(self.node.name):
@@ -60,10 +57,12 @@ def exec(self):
         # Get the index type.
         index_type = self.node.index_type
 
+        assert IndexType.is_faiss_index_type(
+            index_type
+        ), "Index type {} is not supported.".format(index_type)
+
         if IndexType.is_faiss_index_type(index_type):
             self._create_faiss_index()
-        else:
-            raise ExecutorError("Index type {} is not supported.".format(index_type))
 
         yield Batch(
             pd.DataFrame(
@@ -78,39 +77,6 @@ def _get_index_save_path(self):
             / Path("{}_{}.index".format(self.node.index_type, self.node.name))
         )
 
-    # Comment out since Index IO is not needed for now.
-    # def _get_index_io_list(self, input_dim):
-    #     # Input dimension is inferred from the actual feature.
-    #     catalog_manager = CatalogManager()
-    #     input_index_io = catalog_manager.index_io(
-    #         "input_feature",
-    #         ColumnType.NDARRAY,
-    #         NdArrayType.FLOAT32,
-    #         [Dimension.ANYDIM, input_dim],
-    #         True,
-    #     )
-
-    #     # Output dimension depends on number of searched
-    #     # feature vectors and top N similar feature vectors.
-    #     # IndexIO has detailed documentation about input and
-    #     # output format of index.
-    #     id_index_io = catalog_manager.index_io(
-    #         "logical_id",
-    #         ColumnType.NDARRAY,
-    #         NdArrayType.INT64,
-    #         [Dimension.ANYDIM, Dimension.ANYDIM],
-    #         False,
-    #     )
-    #     distance_index_io = catalog_manager.index_io(
-    #         "distance",
-    #         ColumnType.NDARRAY,
-    #         NdArrayType.FLOAT32,
-    #         [Dimension.ANYDIM, Dimension.ANYDIM],
-    #         False,
-    #     )
-
-    #     return [input_index_io, id_index_io, distance_index_io]
-
     def _create_faiss_index(self):
         try:
             catalog_manager = CatalogManager()

diff --git a/eva/executor/create_mat_view_executor.py b/eva/executor/create_mat_view_executor.py
@@ -28,9 +28,6 @@ def __init__(self, node: CreateMaterializedViewPlan):
         super().__init__(node)
         self.catalog = CatalogManager()
 
-    def validate(self):
-        pass
-
     def exec(self):
         """Create materialized view executor"""
         if not handle_if_not_exists(self.node.view, self.node.if_not_exists):

diff --git a/eva/executor/create_udf_executor.py b/eva/executor/create_udf_executor.py
@@ -28,9 +28,6 @@ class CreateUDFExecutor(AbstractExecutor):
     def __init__(self, node: CreateUDFPlan):
         super().__init__(node)
 
-    def validate(self):
-        pass
-
     def exec(self):
         """Create udf executor
 

diff --git a/eva/executor/delete_executor.py b/eva/executor/delete_executor.py
@@ -12,18 +12,17 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from typing import Generator, Iterator
+from typing import Iterator
 
 import pandas as pd
 
 from eva.catalog.catalog_manager import CatalogManager
 from eva.catalog.catalog_type import TableType
 from eva.executor.abstract_executor import AbstractExecutor
-from eva.executor.executor_utils import ExecutorError, apply_predicate
+from eva.executor.executor_utils import apply_predicate
 from eva.models.storage.batch import Batch
 from eva.plan_nodes.project_plan import ProjectPlan
 from eva.storage.storage_engine import StorageEngine
-from eva.utils.logging_manager import logger
 
 
 class DeleteExecutor(AbstractExecutor):
@@ -34,53 +33,35 @@ def __init__(self, node: ProjectPlan):
         self.predicate = node.where_clause
         self.catalog = CatalogManager()
 
-    def validate(self):
-        pass
-
     def exec(self, **kwargs) -> Iterator[Batch]:
-        try:
-            table_catalog = self.node.table_ref.table.table_obj
-            storage_engine = StorageEngine.factory(table_catalog)
-
-            del_batch = Batch()
+        table_catalog = self.node.table_ref.table.table_obj
+        storage_engine = StorageEngine.factory(table_catalog)
+        del_batch = Batch()
 
-            if table_catalog.table_type == TableType.VIDEO_DATA:
-                raise NotImplementedError("DELETE only implemented for structured data")
-            elif table_catalog.table_type == TableType.IMAGE_DATA:
-                raise NotImplementedError("DELETE only implemented for structured data")
-            elif table_catalog.table_type == TableType.STRUCTURED_DATA:
-                del_batch = storage_engine.read(table_catalog)
-                del_batch = list(del_batch)[0]
+        assert (
+            table_catalog.table_type == TableType.STRUCTURED_DATA
+        ), "DELETE only implemented for structured data"
 
-            # Added because of inconsistency in col_alias in Structured data Batch project function
-            original_column_names = list(del_batch.frames.columns)
-            column_names = [
-                f"{table_catalog.name.lower()}.{name}"
-                for name in original_column_names
-                if not name == "_row_id"
-            ]
-            column_names.insert(0, "_row_id")
-            del_batch.frames.columns = column_names
-            del_batch = apply_predicate(del_batch, self.predicate)
+        del_batch = storage_engine.read(table_catalog)
+        del_batch = list(del_batch)[0]
 
-            # All the batches that need to be deleted
+        # Added because of inconsistency in col_alias in Structured data Batch project function
+        original_column_names = list(del_batch.frames.columns)
+        column_names = [
+            f"{table_catalog.name.lower()}.{name}"
+            for name in original_column_names
+            if not name == "_row_id"
+        ]
+        column_names.insert(0, "_row_id")
+        del_batch.frames.columns = column_names
+        del_batch = apply_predicate(del_batch, self.predicate)
 
-            if table_catalog.table_type == TableType.VIDEO_DATA:
-                storage_engine.delete(table_catalog, del_batch)
-            elif table_catalog.table_type == TableType.IMAGE_DATA:
-                storage_engine.delete(table_catalog, del_batch)
-            elif table_catalog.table_type == TableType.STRUCTURED_DATA:
-                del_batch.frames.columns = original_column_names
-                table_needed = del_batch.frames[
-                    [f"{self.predicate.children[0].col_name}"]
-                ]
-                for num in range(len(del_batch)):
-                    storage_engine.delete(table_catalog, table_needed.iloc[num])
-            yield Batch(pd.DataFrame(["Deleted row"]))
+        # All the batches that need to be deleted
 
-        except Exception as e:
-            logger.error(e)
-            raise ExecutorError(e)
+        if table_catalog.table_type == TableType.STRUCTURED_DATA:
+            del_batch.frames.columns = original_column_names
+            table_needed = del_batch.frames[[f"{self.predicate.children[0].col_name}"]]
+            for num in range(len(del_batch)):
+                storage_engine.delete(table_catalog, table_needed.iloc[num])
 
-    def __call__(self, **kwargs) -> Generator[Batch, None, None]:
-        yield from self.exec(**kwargs)
+        yield Batch(pd.DataFrame(["Deleted row"]))