-
Notifications
You must be signed in to change notification settings - Fork 264
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
chore: reducing coverage loss #619
Changes from 161 commits
c9afc12
34dfbf7
9fa9857
ebe26d3
bc722dd
0e29858
c1a7864
5ac631a
3238e95
02a1d28
01181b5
562a7ca
9887732
d2a1a3d
f09c613
79a6168
5ce1991
91d7b06
0aac934
ee48803
fc2f243
343a4a2
121451f
5b47c15
8c75a5e
6772cd0
7a10d67
1a4204f
e96d3a4
640e7ed
cb50de3
533de9e
e8081e4
97bfac4
0127ad0
fc353c9
10fd83a
d559fe1
445a710
d246715
17247fe
8a2d5a3
365ae2f
88e74e3
0d4cde2
a8918a7
40bc613
5a2faac
5f4a802
e952beb
247b041
08420d1
536ae74
d30fcbd
31aa92d
db930a4
f76e856
0ae4cd7
c7c763e
2711582
86c0513
4fcc190
94d0ae0
0701eb8
b31ad91
2bf76ff
d72d778
d74791c
b783430
2450485
b78280e
166aa32
22d1386
ba6bda9
f9f4a2b
429e513
7e525a2
1afe38a
9744432
cb859db
acf36ce
768a8a2
32da4af
13c7710
ea2cee7
eb56fcc
ad339a6
db1f388
80bf59c
a740112
41c2051
9fe2f81
f37b92c
46e2106
35168e7
e16eb3c
101761e
6732aef
7f07e13
cb27222
f7f0274
463183c
b230ad9
6ae22c8
61f3ca4
9834ebf
51ad946
b9e5ea6
b9314fb
43c04a4
8e4a32f
1481442
3acbc20
3611298
f6b27a0
11eb92d
c1db3ce
25f48a7
562bcbd
01a1bd4
a1ec020
307d3d4
98733a6
80292fc
a80e44e
27a2a47
2494795
a128b98
7c4aec1
94b892b
b846e82
d1c2c1c
6d61eb1
c774038
5419356
4c22deb
93f6c58
49e6f97
1d82644
8bfa3d5
87dc9a9
d9f5f3d
20ef35d
734fa8a
162cc42
508e4a7
31dcd9a
7ac35d9
53c3fec
356310b
c5b6c77
03c9b50
212e001
1cda24c
bbff19b
c104ec3
36d1946
f15e0a3
bfb344d
7d89836
b586931
8f43895
a57f720
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -90,31 +90,30 @@ def _bind_create_index_statement(self, node: CreateIndexStatement): | |
# TODO: create index currently only works on TableInfo, but will extend later. | ||
assert node.table_ref.is_table_atom(), "Index can only be created on Tableinfo" | ||
|
||
if IndexType.is_faiss_index_type(node.index_type): | ||
if not node.udf_func: | ||
# Feature table type needs to be float32 numpy array. | ||
col_def = node.col_list[0] | ||
table_ref_obj = node.table_ref.table.table_obj | ||
col = [ | ||
col for col in table_ref_obj.columns if col.name == col_def.name | ||
][0] | ||
if not col.array_type == NdArrayType.FLOAT32: | ||
raise BinderError("Index input needs to be float32.") | ||
if not len(col.array_dimensions) == 2: | ||
raise BinderError("Index input needs to be 2 dimensional.") | ||
else: | ||
# Output of the UDF should be 2 dimension and float32 type. | ||
catalog_manager = CatalogManager() | ||
udf_obj = catalog_manager.get_udf_catalog_entry_by_name( | ||
node.udf_func.name | ||
) | ||
for output in udf_obj.outputs: | ||
if not output.array_type == NdArrayType.FLOAT32: | ||
raise BinderError("Index input needs to be float32.") | ||
if not len(output.array_dimensions) == 2: | ||
raise BinderError("Index input needs to be 2 dimensional.") | ||
assert IndexType.is_faiss_index_type( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just curious, if we support index type other than Faiss later, we still need to do it with if else right (assuming we have some index specific checks)? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes |
||
node.index_type | ||
), "Index type {} is not supported.".format(node.index_type) | ||
|
||
if not node.udf_func: | ||
# Feature table type needs to be float32 numpy array. | ||
col_def = node.col_list[0] | ||
table_ref_obj = node.table_ref.table.table_obj | ||
col = [col for col in table_ref_obj.columns if col.name == col_def.name][0] | ||
assert ( | ||
col.array_type == NdArrayType.FLOAT32 | ||
), "Index input needs to be float32." | ||
assert len(col.array_dimensions) == 2 | ||
else: | ||
raise BinderError("Index type {} is not supported.".format(node.index_type)) | ||
# Output of the UDF should be 2 dimension and float32 type. | ||
catalog_manager = CatalogManager() | ||
udf_obj = catalog_manager.get_udf_catalog_entry_by_name(node.udf_func.name) | ||
for output in udf_obj.outputs: | ||
assert ( | ||
output.array_type == NdArrayType.FLOAT32 | ||
), "Index input needs to be float32." | ||
assert ( | ||
len(output.array_dimensions) == 2 | ||
), "Index input needs to be 2 dimensional." | ||
|
||
@bind.register(SelectStatement) | ||
def _bind_select_statement(self, node: SelectStatement): | ||
|
@@ -158,10 +157,9 @@ def _bind_create_mat_statement(self, node: CreateMaterializedViewStatement): | |
@bind.register(RenameTableStatement) | ||
def _bind_rename_table_statement(self, node: RenameTableStatement): | ||
self.bind(node.old_table_ref) | ||
if node.old_table_ref.table.table_obj.table_type == TableType.STRUCTURED_DATA: | ||
err_msg = "Rename not yet supported on structured data" | ||
logger.exception(err_msg) | ||
raise BinderError(err_msg) | ||
assert ( | ||
node.old_table_ref.table.table_obj.table_type != TableType.STRUCTURED_DATA | ||
), "Rename not yet supported on structured data" | ||
|
||
@bind.register(TableRef) | ||
def _bind_tableref(self, node: TableRef): | ||
|
@@ -230,14 +228,10 @@ def _bind_func_expr(self, node: FunctionExpression): | |
# Verify the consistency of the UDF. If the checksum of the UDF does not match | ||
# the one stored in the catalog, an error will be thrown and the user will be | ||
# asked to register the UDF again. | ||
if get_file_checksum(udf_obj.impl_file_path) != udf_obj.checksum: | ||
err_msg = ( | ||
f"UDF file {udf_obj.impl_file_path} has been modified from the " | ||
"registration. Please create a new UDF using the CREATE UDF command or " | ||
"UPDATE the existing one." | ||
) | ||
logger.error(err_msg) | ||
raise BinderError(err_msg) | ||
assert ( | ||
get_file_checksum(udf_obj.impl_file_path) == udf_obj.checksum | ||
), f"""UDF file {udf_obj.impl_file_path} has been modified from the | ||
registration. Please create a new UDF using the CREATE UDF command or UPDATE the existing one.""" | ||
|
||
try: | ||
node.function = load_udf_class_from_file( | ||
|
@@ -279,10 +273,6 @@ def _bind_func_expr(self, node: FunctionExpression): | |
] | ||
node.alias = Alias(node.alias.alias_name, output_aliases) | ||
|
||
if len(node.alias.col_names) != len(node.output_objs): | ||
err_msg = ( | ||
f"Expected {len(node.output_objs)} output columns for " | ||
f"{node.alias.alias_name}, got {len(node.alias.col_names)}." | ||
) | ||
logger.error(err_msg) | ||
raise BinderError(err_msg) | ||
assert len(node.alias.col_names) == len( | ||
node.output_objs | ||
), f"""Expected {len(node.output_objs)} output columns for {node.alias.alias_name}, got {len(node.alias.col_names)}.""" |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -40,7 +40,6 @@ | |
from eva.parser.create_statement import ColumnDefinition | ||
from eva.parser.table_ref import TableInfo | ||
from eva.parser.types import FileFormatType | ||
from eva.utils.errors import CatalogError | ||
from eva.utils.generic_utils import generate_file_path, get_file_checksum | ||
from eva.utils.logging_manager import logger | ||
|
||
|
@@ -380,14 +379,17 @@ def create_and_insert_multimedia_table_catalog_entry( | |
Returns: | ||
TableCatalogEntry: newly inserted table catalog entry | ||
""" | ||
assert format_type in [ | ||
FileFormatType.VIDEO, | ||
FileFormatType.IMAGE, | ||
], f"Format Type {format_type} is not supported" | ||
|
||
if format_type is FileFormatType.VIDEO: | ||
columns = get_video_table_column_definitions() | ||
table_type = TableType.VIDEO_DATA | ||
elif format_type is FileFormatType.IMAGE: | ||
columns = get_image_table_column_definitions() | ||
table_type = TableType.IMAGE_DATA | ||
else: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So, removing this will result in There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added |
||
raise CatalogError(f"Format Type {format_type} is not supported") | ||
|
||
return self.create_and_insert_table_catalog_entry( | ||
TableInfo(name), columns, table_type=table_type | ||
|
@@ -407,10 +409,9 @@ def get_multimedia_metadata_table_catalog_entry( | |
# use file_url as the metadata table name | ||
media_metadata_name = Path(input_table.file_url).stem | ||
obj = self.get_table_catalog_entry(media_metadata_name) | ||
if not obj: | ||
err = f"Table with name {media_metadata_name} does not exist in catalog" | ||
logger.exception(err) | ||
raise CatalogError(err) | ||
assert ( | ||
obj is not None | ||
), f"Table with name {media_metadata_name} does not exist in catalog" | ||
|
||
return obj | ||
|
||
|
@@ -432,10 +433,7 @@ def create_and_insert_multimedia_metadata_table_catalog_entry( | |
# use file_url as the metadata table name | ||
media_metadata_name = Path(input_table.file_url).stem | ||
obj = self.get_table_catalog_entry(media_metadata_name) | ||
if obj: | ||
err_msg = f"Table with name {media_metadata_name} already exists" | ||
logger.exception(err_msg) | ||
raise CatalogError(err_msg) | ||
assert obj is None, "Table with name {media_metadata_name} already exists" | ||
|
||
columns = [ColumnDefinition("file_url", ColumnType.TEXT, None, None)] | ||
obj = self.create_and_insert_table_catalog_entry( | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -47,9 +47,6 @@ class CreateIndexExecutor(AbstractExecutor): | |
def __init__(self, node: CreateIndexPlan): | ||
super().__init__(node) | ||
|
||
def validate(self): | ||
pass | ||
|
||
def exec(self): | ||
catalog_manager = CatalogManager() | ||
if catalog_manager.get_index_catalog_entry_by_name(self.node.name): | ||
|
@@ -60,10 +57,12 @@ def exec(self): | |
# Get the index type. | ||
index_type = self.node.index_type | ||
|
||
assert IndexType.is_faiss_index_type( | ||
index_type | ||
), "Index type {} is not supported.".format(index_type) | ||
|
||
if IndexType.is_faiss_index_type(index_type): | ||
self._create_faiss_index() | ||
else: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Won't this result in saying the index is successfully created? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added |
||
raise ExecutorError("Index type {} is not supported.".format(index_type)) | ||
|
||
yield Batch( | ||
pd.DataFrame( | ||
|
@@ -78,39 +77,6 @@ def _get_index_save_path(self): | |
/ Path("{}_{}.index".format(self.node.index_type, self.node.name)) | ||
) | ||
|
||
# Comment out since Index IO is not needed for now. | ||
# def _get_index_io_list(self, input_dim): | ||
# # Input dimension is inferred from the actual feature. | ||
# catalog_manager = CatalogManager() | ||
# input_index_io = catalog_manager.index_io( | ||
# "input_feature", | ||
# ColumnType.NDARRAY, | ||
# NdArrayType.FLOAT32, | ||
# [Dimension.ANYDIM, input_dim], | ||
# True, | ||
# ) | ||
|
||
# # Output dimension depends on number of searched | ||
# # feature vectors and top N similar feature vectors. | ||
# # IndexIO has detailed documentation about input and | ||
# # output format of index. | ||
# id_index_io = catalog_manager.index_io( | ||
# "logical_id", | ||
# ColumnType.NDARRAY, | ||
# NdArrayType.INT64, | ||
# [Dimension.ANYDIM, Dimension.ANYDIM], | ||
# False, | ||
# ) | ||
# distance_index_io = catalog_manager.index_io( | ||
# "distance", | ||
# ColumnType.NDARRAY, | ||
# NdArrayType.FLOAT32, | ||
# [Dimension.ANYDIM, Dimension.ANYDIM], | ||
# False, | ||
# ) | ||
|
||
# return [input_index_io, id_index_io, distance_index_io] | ||
|
||
def _create_faiss_index(self): | ||
try: | ||
catalog_manager = CatalogManager() | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Like
eva/catalog/catalog_manager.py
, shall we also replacelogger.execption and raise BinderError
with assert for consistency?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, done