diff --git a/eva/parser/lark_visitor/_table_sources.py b/eva/parser/lark_visitor/_table_sources.py index 08305931e2..8f6631ef8b 100644 --- a/eva/parser/lark_visitor/_table_sources.py +++ b/eva/parser/lark_visitor/_table_sources.py @@ -81,7 +81,6 @@ def table_source_item_with_sample(self, tree): sample_freq = self.visit(child) elif child.data == "sample_clause_with_type": sample_freq, sample_type = self.visit(child) - print(sample_freq, sample_type) elif child.data == "alias_clause": alias = self.visit(child) diff --git a/eva/readers/decord_reader.py b/eva/readers/decord_reader.py index a00c17669e..66cfb9f765 100644 --- a/eva/readers/decord_reader.py +++ b/eva/readers/decord_reader.py @@ -16,6 +16,7 @@ import cv2 import decord +import math from eva.constants import IFRAMES from eva.expression.abstract_expression import AbstractExpression @@ -41,6 +42,7 @@ def __init__( can be converted to ranges. Defaults to None. sampling_rate (int, optional): Set if the caller wants one frame every `sampling_rate` number of frames. For example, if `sampling_rate = 10`, it returns every 10th frame. If both `predicate` and `sampling_rate` are specified, `sampling_rate` is given precedence. + sampling_type (str, optional): Set as 'iframe' if caller want to sample on top on iframes only. """ self._predicate = predicate self._sampling_rate = sampling_rate or 1 @@ -60,7 +62,6 @@ def _read(self) -> Iterator[Dict]: if self._sampling_type == IFRAMES: iframes = video.get_key_indices() - print(iframes) idx = 0 for (begin, end) in range_list: while idx < len(iframes) and iframes[idx] < begin: @@ -83,7 +84,7 @@ def _read(self) -> Iterator[Dict]: yield { "id": frame_id, "data": frame, - "seconds": frame_id // video.get(cv2.CAP_PROP_FPS), + "seconds": math.floor(video.get_frame_timestamp(frame_id)[0]), } else: break diff --git a/test/integration_tests/test_select_executor.py b/test/integration_tests/test_select_executor.py index c20984c3fa..cd91b5e5ca 100644 --- a/test/integration_tests/test_select_executor.py +++ b/test/integration_tests/test_select_executor.py @@ -266,6 +266,17 @@ def test_select_and_sample(self): self.assertEqual(len(actual_batch), len(expected_batch[0])) self.assertEqual(actual_batch, expected_batch[0]) + def test_select_and_iframe_sample(self): + select_query = "SELECT id FROM MyVideo SAMPLE 7 'iframe' ORDER BY id;" + actual_batch = execute_query_fetch_all(select_query) + actual_batch.sort() + + expected_batch = list(create_dummy_batches(filters=range(0, NUM_FRAMES, 7))) + expected_batch[0] = expected_batch[0].project(["myvideo.id"]) + + self.assertEqual(len(actual_batch), len(expected_batch[0])) + self.assertEqual(actual_batch, expected_batch[0]) + def test_select_and_groupby_first(self): # groupby and orderby together not tested because groupby # only applies to video data which is already sorted diff --git a/test/readers/test_decord_reader.py b/test/readers/test_decord_reader.py new file mode 100644 index 0000000000..ad321e89c8 --- /dev/null +++ b/test/readers/test_decord_reader.py @@ -0,0 +1,103 @@ +# coding=utf-8 +# Copyright 2018-2022 EVA +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +import unittest +from test.util import ( + FRAME_SIZE, + NUM_FRAMES, + create_dummy_batches, + create_sample_video, + file_remove, + upload_dir_from_config, +) + +from eva.expression.abstract_expression import ExpressionType +from eva.expression.comparison_expression import ComparisonExpression +from eva.expression.constant_value_expression import ConstantValueExpression +from eva.expression.logical_expression import LogicalExpression +from eva.expression.tuple_value_expression import TupleValueExpression +from eva.readers.decord_reader import DecordReader + + +class DecordLoaderTest(unittest.TestCase): + def setUp(self): + create_sample_video() + + def tearDown(self): + file_remove("dummy.avi") + + def test_should_sample_every_k_frame(self): + for k in range(1, 10): + video_loader = DecordReader( + file_url=os.path.join(upload_dir_from_config, "dummy.avi"), + batch_mem_size=FRAME_SIZE * NUM_FRAMES, + sampling_rate=k, + ) + batches = list(video_loader.read()) + expected = list( + create_dummy_batches(filters=[i for i in range(0, NUM_FRAMES, k)]) + ) + self.assertTrue(batches, expected) + + def test_should_sample_every_k_frame_with_predicate(self): + col = TupleValueExpression("id") + val = ConstantValueExpression(NUM_FRAMES // 2) + predicate = ComparisonExpression( + ExpressionType.COMPARE_GEQ, left=col, right=val + ) + for k in range(2, 4): + video_loader = DecordReader( + file_url=os.path.join(upload_dir_from_config, "dummy.avi"), + batch_mem_size=FRAME_SIZE * NUM_FRAMES, + sampling_rate=k, + predicate=predicate, + ) + batches = list(video_loader.read()) + for batch in batches: + print(batch) + value = NUM_FRAMES // 2 + start = value + k - (value % k) if value % k else value + expected = list( + create_dummy_batches(filters=[i for i in range(start, NUM_FRAMES, k)]) + ) + self.assertTrue(batches, expected) + + value = 2 + predicate_1 = ComparisonExpression( + ExpressionType.COMPARE_GEQ, + left=TupleValueExpression("id"), + right=ConstantValueExpression(value), + ) + predicate_2 = ComparisonExpression( + ExpressionType.COMPARE_LEQ, + left=TupleValueExpression("id"), + right=ConstantValueExpression(8), + ) + predicate = LogicalExpression( + ExpressionType.LOGICAL_AND, predicate_1, predicate_2 + ) + for k in range(2, 4): + video_loader = DecordReader( + file_url=os.path.join(upload_dir_from_config, "dummy.avi"), + batch_mem_size=FRAME_SIZE * NUM_FRAMES, + sampling_rate=k, + predicate=predicate, + ) + batches = list(video_loader.read()) + start = value + k - (value % k) if value % k else value + expected = list( + create_dummy_batches(filters=[i for i in range(start, 8, k)]) + ) + self.assertTrue(batches, expected)