From 8372e705bb54faba8df54a47413a03c193bef4ec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9E=D0=BB=D0=B5=D0=B3?= <150132506+iddqdex@users.noreply.github.com> Date: Fri, 11 Oct 2024 14:13:18 +0300 Subject: [PATCH] check tables size before run bencmarks (#10317) --- ydb/tests/olap/load/conftest.py | 54 ++++++++++- ydb/tests/olap/load/test_clickbench.py | 25 ++--- ydb/tests/olap/load/test_tpcds.py | 94 ++++++++++++++++++- ydb/tests/olap/load/test_tpch.py | 50 +++++++++- ydb/tests/olap/load/ya.make | 1 + .../scenario/helpers/scenario_tests_helper.py | 6 +- 6 files changed, 194 insertions(+), 36 deletions(-) diff --git a/ydb/tests/olap/load/conftest.py b/ydb/tests/olap/load/conftest.py index 3fc6254418ad..bda46d83b70d 100644 --- a/ydb/tests/olap/load/conftest.py +++ b/ydb/tests/olap/load/conftest.py @@ -5,6 +5,7 @@ from ydb.tests.olap.lib.ydb_cli import YdbCliHelper, WorkloadType from ydb.tests.olap.lib.allure_utils import allure_test_description from ydb.tests.olap.lib.results_processor import ResultsProcessor +from ydb.tests.olap.scenario.helpers.scenario_tests_helper import ScenarioTestHelper from time import time from typing import Optional from allure_commons._core import plugin_manager @@ -45,6 +46,24 @@ def _get_timeout(cls, query_num: int) -> float: def _test_name(cls, query_num: int) -> str: return f'Query{query_num:02d}' + @allure.step('check tables size') + def check_tables_size(self, folder: Optional[str], tables: dict[str, int]): + sth = ScenarioTestHelper(None) + errors: list[str] = [] + for table, expected_size in tables.items(): + if folder is None: + table_full = table + elif folder.endswith('/') or table.startswith('/'): + table_full = f'{folder}{table}' + else: + table_full = f'{folder}/{table}' + size = sth.get_table_rows_count(table_full) + if size != expected_size: + errors.append(f'table `{table}`: expect {expected_size}, but actually is {size};') + if len(errors) > 0: + msg = "\n".join(errors) + pytest.fail(f'Unexpected tables size in `{folder}`:\n {msg}') + def process_query_result(self, result: YdbCliHelper.WorkloadRunResult, query_num: int, iterations: int, upload: bool): def _get_duraton(stats, field): if stats is None: @@ -131,12 +150,37 @@ def _attach_plans(plan: YdbCliHelper.QueryPlan) -> None: exc = exc.with_traceback(result.traceback) raise exc + def setup_class(self) -> None: + if not hasattr(self, 'do_setup_class'): + return + error = None + tb = None + start_time = time() + try: + self.do_setup_class(self) + except BaseException as e: + error = str(e) + tb = e.__traceback__ + ResultsProcessor.upload_results( + kind='Load', + suite=self.suite, + test='_Verification', + timestamp=start_time, + is_successful=(error is None) + ) + if error is not None: + exc = pytest.fail.Exception(error) + exc.with_traceback(tb) + raise exc + def run_workload_test(self, path: str, query_num: int) -> None: - allure_listener = next(filter(lambda x: isinstance(x, AllureListener), plugin_manager.get_plugin_manager().get_plugins())) - allure_test_result = allure_listener.allure_logger.get_test(None) - query_num_param = next(filter(lambda x: x.name == 'query_num', allure_test_result.parameters), None) - if query_num_param: - query_num_param.mode = allure.parameter_mode.HIDDEN.value + for plugin in plugin_manager.get_plugin_manager().get_plugins(): + if isinstance(plugin, AllureListener): + allure_test_result = plugin.allure_logger.get_test(None) + if allure_test_result is not None: + for param in allure_test_result.parameters: + if param.name == 'query_num': + param.mode = allure.parameter_mode.HIDDEN.value start_time = time() result = YdbCliHelper.workload_run( path=path, diff --git a/ydb/tests/olap/load/test_clickbench.py b/ydb/tests/olap/load/test_clickbench.py index f2cb2fe320f9..d7b68565798a 100644 --- a/ydb/tests/olap/load/test_clickbench.py +++ b/ydb/tests/olap/load/test_clickbench.py @@ -1,9 +1,7 @@ import allure import pytest -from time import time from conftest import LoadSuiteBase from os import getenv -from ydb.tests.olap.lib.results_processor import ResultsProcessor from ydb.tests.olap.lib.ydb_cli import WorkloadType, YdbCliHelper from ydb.tests.olap.lib.ydb_cluster import YdbCluster from ydb.tests.olap.lib.utils import get_external_param @@ -13,19 +11,20 @@ class TestClickbench(LoadSuiteBase): suite = 'Clickbench' workload_type: WorkloadType = WorkloadType.Clickbench refference: str = 'CH.60' + path = get_external_param('table-path-clickbench', f'{YdbCluster.tables_path}/clickbench/hits') - def setup_class(self): + def do_setup_class(self): if getenv('NO_VERIFY_DATA', '0') == '1' or getenv('NO_VERIFY_DATA_CLICKBECNH', '0') == '1': return - root_path = YdbCluster.tables_path - path = get_external_param('table-path-clickbench', f'{root_path}/clickbench/hits') + + self.check_tables_size(folder=None, tables={'clickbench/hits': 99997497}) + fail_count = 0 - start_time = time() for query_num in range(0, 43): try: with allure.step(f'request {query_num}'): result = YdbCliHelper.workload_run( - path=path, + path=self.path, query_num=query_num, iterations=1, workload_type=self.workload_type, @@ -36,19 +35,9 @@ def setup_class(self): except BaseException: fail_count += 1 - test = '_Verification' - ResultsProcessor.upload_results( - kind='Load', - suite=self.suite, - test=test, - timestamp=start_time, - is_successful=(fail_count == 0) - ) if fail_count > 0: pytest.fail(f'{fail_count} verification queries failed') @pytest.mark.parametrize('query_num', [i for i in range(0, 43)]) def test_clickbench(self, query_num): - root_path = YdbCluster.tables_path - path = get_external_param(f'table-path-{self.suite}', f'{root_path}/clickbench/hits') - self.run_workload_test(path, query_num) + self.run_workload_test(self.path, query_num) diff --git a/ydb/tests/olap/load/test_tpcds.py b/ydb/tests/olap/load/test_tpcds.py index 63d82ff78db7..b1724a3a96dc 100644 --- a/ydb/tests/olap/load/test_tpcds.py +++ b/ydb/tests/olap/load/test_tpcds.py @@ -1,30 +1,94 @@ +from __future__ import annotations import pytest from conftest import LoadSuiteBase +from os import getenv from ydb.tests.olap.lib.ydb_cli import WorkloadType from ydb.tests.olap.lib.utils import get_external_param from ydb.tests.olap.lib.ydb_cluster import YdbCluster class TpcdsSuiteBase(LoadSuiteBase): - size: int = None + size: int = 0 workload_type: WorkloadType = WorkloadType.TPC_DS iterations: int = 3 + tables_size: dict[str, int] = {} + + def _get_tables_size(self) -> dict[str, int]: + result: dict[str, int] = { + 'customer_demographics': 1920800, + 'date_dim': 73049, + 'household_demographics': 7200, + 'income_band': 20, + 'ship_mode': 20, + 'time_dim': 86400, + } + result.update(self.tables_size) + return result + + def _get_path(self, full: bool = True) -> str: + if full: + tpcds_path = get_external_param('table-path-tpcds', f'{YdbCluster.tables_path}/tpcds') + else: + tpcds_path = 'tpcds' + return get_external_param(f'table-path-{self.suite}', f'{tpcds_path}/s{self.size}') + + def do_setup_class(self): + if getenv('NO_VERIFY_DATA', '0') == '1' or getenv('NO_VERIFY_DATA_TPCH', '0') == '1' or getenv(f'NO_VERIFY_DATA_TPCH_{self.size}'): + return + self.check_tables_size(self, folder=self._get_path(self, False), tables=self._get_tables_size(self)) @pytest.mark.parametrize('query_num', [i for i in range(1, 100)]) def test_tpcds(self, query_num: int): - root_path = YdbCluster.tables_path - tpcds_path = get_external_param('table-path-tpcds', f'{root_path}/tpcds') - path = get_external_param(f'table-path-{self.suite}', f'{tpcds_path}/s{self.size}') - self.run_workload_test(path, query_num) + self.run_workload_test(self._get_path(), query_num) class TestTpcds1(TpcdsSuiteBase): size: int = 1 + tables_size: dict[str, int] = { + 'call_center': 6, + 'catalog_page': 11718, + 'catalog_returns': 144067, + 'catalog_sales': 1441548, + 'customer_address': 50000, + 'customer': 100000, + 'inventory': 11745000, + 'item': 18000, + 'promotion': 300, + 'reason': 35, + 'store': 12, + 'store_returns': 287514, + 'store_sales': 2880404, + 'warehouse': 5, + 'web_page': 60, + 'web_returns': 71763, + 'web_sales': 719384, + 'web_site': 30, + } class TestTpcds10(TpcdsSuiteBase): size: int = 10 timeout = max(TpcdsSuiteBase.timeout, 300.) + tables_size: dict[str, int] = { + 'call_center': 24, + 'catalog_page': 12000, + 'catalog_returns': 1439749, + 'catalog_sales': 14401261, + 'customer': 500000, + 'customer_address': 250000, + 'inventory': 133110000, + 'item': 102000, + 'promotion': 500, + 'reason': 45, + 'store': 102, + 'store_returns': 2875432, + 'store_sales': 28800991, + 'warehouse': 10, + 'web_page': 200, + 'web_returns': 719217, + 'web_sales': 7197566, + 'web_site': 42, + } class TestTpcds100(TpcdsSuiteBase): @@ -35,6 +99,26 @@ class TestTpcds100(TpcdsSuiteBase): 14: LoadSuiteBase.QuerySettings(timeout=max(TpcdsSuiteBase.timeout, 7200.)), 72: LoadSuiteBase.QuerySettings(timeout=max(TpcdsSuiteBase.timeout, 7200.)), } + tables_size: dict[str, int] = { + 'call_center': 30, + 'catalog_page': 20400, + 'catalog_returns': 14404374, + 'catalog_sales': 143997065, + 'customer': 2000000, + 'customer_address': 1000000, + 'inventory': 399330000, + 'item': 204000, + 'promotion': 1000, + 'reason': 55, + 'store': 402, + 'store_returns': 28795080, + 'store_sales': 287997024, + 'warehouse': 15, + 'web_page': 2040, + 'web_returns': 7197670, + 'web_sales': 72001237, + 'web_site': 24, + } class TestTpcds1000(TpcdsSuiteBase): diff --git a/ydb/tests/olap/load/test_tpch.py b/ydb/tests/olap/load/test_tpch.py index 17d3fecd4d06..d8f31e9bf3f8 100644 --- a/ydb/tests/olap/load/test_tpch.py +++ b/ydb/tests/olap/load/test_tpch.py @@ -1,41 +1,81 @@ +from __future__ import annotations import pytest from conftest import LoadSuiteBase +from os import getenv from ydb.tests.olap.lib.ydb_cli import WorkloadType from ydb.tests.olap.lib.utils import get_external_param from ydb.tests.olap.lib.ydb_cluster import YdbCluster class TpchSuiteBase(LoadSuiteBase): - size: int = None + size: int = 0 workload_type: WorkloadType = WorkloadType.TPC_H iterations: int = 3 + tables_size: dict[str, int] = {} + + def _get_tables_size(self) -> dict[str, int]: + result: dict[str, int] = { + 'customer': 150000 * self.size, + 'nation': 25, + 'orders': 1500000 * self.size, + 'part': 200000 * self.size, + 'partsupp': 800000 * self.size, + 'region': 5, + 'supplier': 10000 * self.size, + } + result.update(self.tables_size) + return result + + def _get_path(self, full: bool = True) -> str: + if full: + tpch_path = get_external_param('table-path-tpch', f'{YdbCluster.tables_path}/tpch') + else: + tpch_path = 'tpch' + return get_external_param(f'table-path-{self.suite}', f'{tpch_path}/s{self.size}') + + def do_setup_class(self): + if getenv('NO_VERIFY_DATA', '0') == '1' or getenv('NO_VERIFY_DATA_TPCH', '0') == '1' or getenv(f'NO_VERIFY_DATA_TPCH_{self.size}'): + return + self.check_tables_size(self, folder=self._get_path(self, False), tables=self._get_tables_size(self)) @pytest.mark.parametrize('query_num', [i for i in range(1, 23)]) def test_tpch(self, query_num: int): - root_path = YdbCluster.tables_path - tpch_path = get_external_param('table-path-tpch', f'{root_path}/tpch') - path = get_external_param(f'table-path-{self.suite}', f'{tpch_path}/s{self.size}') - self.run_workload_test(path, query_num) + self.run_workload_test(self._get_path(), query_num) class TestTpch1(TpchSuiteBase): + tables_size: dict[str, int] = { + 'lineitem': 6001215, + } size: int = 1 class TestTpch10(TpchSuiteBase): + tables_size: dict[str, int] = { + 'lineitem': 59986052, + } size: int = 10 class TestTpch100(TpchSuiteBase): + tables_size: dict[str, int] = { + 'lineitem': 600037902, + } size: int = 100 timeout = max(TpchSuiteBase.timeout, 300.) class TestTpch1000(TpchSuiteBase): + tables_size: dict[str, int] = { + 'lineitem': 5999989709, + } size: int = 1000 timeout = max(TpchSuiteBase.timeout, 1000.) class TestTpch10000(TpchSuiteBase): + tables_size: dict[str, int] = { + 'lineitem': 59999994267, + } size: int = 10000 timeout = max(TpchSuiteBase.timeout, 3600.) diff --git a/ydb/tests/olap/load/ya.make b/ydb/tests/olap/load/ya.make index 4b1a30e67ca5..db4f47f31251 100644 --- a/ydb/tests/olap/load/ya.make +++ b/ydb/tests/olap/load/ya.make @@ -19,6 +19,7 @@ PY3TEST() contrib/python/allure-python-commons ydb/public/sdk/python/enable_v3_new_behavior ydb/tests/olap/lib + ydb/tests/olap/scenario/helpers library/python/testing/yatest_common ydb/public/sdk/python ) diff --git a/ydb/tests/olap/scenario/helpers/scenario_tests_helper.py b/ydb/tests/olap/scenario/helpers/scenario_tests_helper.py index 7b1441208434..5a6911de6e3f 100644 --- a/ydb/tests/olap/scenario/helpers/scenario_tests_helper.py +++ b/ydb/tests/olap/scenario/helpers/scenario_tests_helper.py @@ -7,7 +7,7 @@ import json from ydb.tests.olap.lib.ydb_cluster import YdbCluster from abc import abstractmethod, ABC -from typing import Set, List, Dict, Any, Callable +from typing import Set, List, Dict, Any, Callable, Optional from time import sleep @@ -223,7 +223,7 @@ def EOF(self) -> bool: pass - def __init__(self, context: TestContext) -> None: + def __init__(self, context: Optional[TestContext]) -> None: """Constructor. Args: @@ -256,7 +256,7 @@ def _add_not_empty(p: str, dir: str): @staticmethod def _run_with_expected_status( - operation: callable, + operation: Callable, expected_status: ydb.StatusCode | Set[ydb.StatusCode], retriable_status: ydb.StatusCode | Set[ydb.StatusCode] = {}, n_retries=0,