diff --git a/.github/licenserc.yml b/.github/licenserc.yml index eaf7a49eee4..b122b2c9775 100644 --- a/.github/licenserc.yml +++ b/.github/licenserc.yml @@ -25,6 +25,7 @@ header: - '**/LICENSE.TXT' - '**/cipher-file-256' - '**/asan.suppression' + - '**/tsan.suppression' - '**/LICENSE.TXT' - '**/LICENSE' - '**/README' diff --git a/.github/workflows/license-checker.yml b/.github/workflows/license-checker.yml index e156c1b2b4c..2b23cca7eaa 100644 --- a/.github/workflows/license-checker.yml +++ b/.github/workflows/license-checker.yml @@ -15,7 +15,7 @@ jobs: steps: - uses: actions/checkout@v2 - name: Check License Header - uses: apache/skywalking-eyes@main + uses: apache/skywalking-eyes@v0.3.0 env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} with: diff --git a/.gitmodules b/.gitmodules index 8472d78404e..335e1dbd9c8 100644 --- a/.gitmodules +++ b/.gitmodules @@ -82,3 +82,6 @@ [submodule "contrib/cpu_features"] path = contrib/cpu_features url = https://github.com/google/cpu_features +[submodule "contrib/arm-optimized-routines"] + path = contrib/arm-optimized-routines + url = https://github.com/ARM-software/optimized-routines diff --git a/CMakeLists.txt b/CMakeLists.txt index f2ec9f3316b..2e33a127807 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -239,10 +239,8 @@ else () set (CMAKE_CXX_STANDARD_REQUIRED ON) endif () -if (NOT ARCH_ARM) - set (CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -O3") - set (CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS_RELWITHDEBINFO} -O3") -endif () +set (CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -O3") +set (CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS_RELWITHDEBINFO} -O3") option (DEBUG_WITHOUT_DEBUG_INFO "Set to ON to build dev target without debug info (remove flag `-g` in order to accelerate compiling speed and reduce target binary size)" OFF) if (DEBUG_WITHOUT_DEBUG_INFO) diff --git a/README.md b/README.md index 02af727105b..ab996b6f3d6 100644 --- a/README.md +++ b/README.md @@ -242,7 +242,30 @@ LSAN_OPTIONS=suppressions=$WORKSPACE/tiflash/test/sanitize/asan.suppression ## Run Integration Tests -TBD. +1. Build your own tiflash binary in $BUILD with `-DCMAKE_BUILD_TYPE=DEBUG`. +``` +cd $BUILD +cmake $WORKSPACE/tiflash -GNinja -DCMAKE_BUILD_TYPE=DEBUG +ninja tiflash +``` +2. Run tidb cluster locally using tiup playgroud or other tools. +``` +tiup playground nightly --tiflash.binpath $BUILD/dbms/src/Server/tiflash +``` +3. Check $WORKSPACE/tests/_env.sh to make the port and build dir right. +4. Run your integration tests using commands like "./run-test.sh fullstack-test2/ddl" under $WORKSPACE/tests dir + +## Run MicroBenchmark Tests + +To run micro benchmark tests, you need to build with -DCMAKE_BUILD_TYPE=RELEASE -DENABLE_TESTS=ON: + +```shell +cd $BUILD +cmake $WORKSPACE/tiflash -GNinja -DCMAKE_BUILD_TYPE=RELEASE -DENABLE_TESTS=ON +ninja bench_dbms +``` + +And the microbenchmark-test executables are at `$BUILD/dbms/bench_dbms`, you can run it with `./bench_dbms` or `./bench_dbms --benchmark_filter=xxx` . More usage please check with `./bench_dbms --help`. ## Generate LLVM Coverage Report diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index 71f81ae3ee5..4520d1cb176 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -165,3 +165,7 @@ add_subdirectory(benchmark) set (BUILD_TESTING OFF CACHE BOOL "Disable cpu-features testing" FORCE) add_subdirectory(cpu_features) + +if (ARCH_AARCH64 AND ARCH_LINUX) + add_subdirectory(arm-optimized-routines-cmake) +endif () diff --git a/contrib/arm-optimized-routines b/contrib/arm-optimized-routines new file mode 160000 index 00000000000..e373f659523 --- /dev/null +++ b/contrib/arm-optimized-routines @@ -0,0 +1 @@ +Subproject commit e373f6595230087a8ddea449bfb14b47150b4059 diff --git a/contrib/arm-optimized-routines-cmake/CMakeLists.txt b/contrib/arm-optimized-routines-cmake/CMakeLists.txt new file mode 100644 index 00000000000..89baa7222f3 --- /dev/null +++ b/contrib/arm-optimized-routines-cmake/CMakeLists.txt @@ -0,0 +1,45 @@ +# Copyright 2022 PingCAP, Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This library is to override performance-critical routines for aarch64 targets. +# The implementations are imported from official ARM repo. +# To reduce dispatching cost, indirect function technique is utilized. Therefore, +# this library should only be enabled with ELF targets. + +# Considerations: +# - By Jun, 2022, most enterprise OSs (CentOS 7, CentOS Stream 8 and RHEL 8) still +# use relatively old glibc on ARM64, where ASIMD, MTE, DC ZVA and SVE are not +# fully utilized. However, it is becoming increasingly common to use ARM64 instances +# in cloud-native situations. +# - `optimized-routines` repo is actively maintained by ARM officials. Therefore, +# the qualities can be ensured while using it also enables us to keep sync with latest +# acceleration techniques. + +set(CMAKE_C_FLAGS "") +ENABLE_LANGUAGE(C) +ENABLE_LANGUAGE(ASM) +set(TIFLASH_AOR_DIR ../arm-optimized-routines) + +file(GLOB TIFLASH_AARCH64_STRING_FILES ${TIFLASH_AOR_DIR}/string/aarch64/*.S) +add_library(tiflash-aarch64-string STATIC ${TIFLASH_AARCH64_STRING_FILES} src/aor.c) +target_compile_options(tiflash-aarch64-string PRIVATE -march=armv8-a+sve) +target_include_directories(tiflash-aarch64-string PRIVATE ${TIFLASH_AOR_DIR}/string/include) + +file(GLOB TIFLASH_AARCH64_MATH_FILES ${TIFLASH_AOR_DIR}/math/*.c) +add_library(tiflash-aarch64-math STATIC ${TIFLASH_AARCH64_MATH_FILES}) +target_include_directories(tiflash-aarch64-math PRIVATE ${TIFLASH_AOR_DIR}/math/include) + +# it is reasonable to keep these libraries optimized +target_compile_options(tiflash-aarch64-string PRIVATE -O3 -g3 -fno-omit-frame-pointer -ffunction-sections -fdata-sections) +target_compile_options(tiflash-aarch64-math PRIVATE -O3 -g3 -fno-omit-frame-pointer -ffunction-sections -fdata-sections) diff --git a/contrib/arm-optimized-routines-cmake/src/aor.c b/contrib/arm-optimized-routines-cmake/src/aor.c new file mode 100644 index 00000000000..daff1df3c4b --- /dev/null +++ b/contrib/arm-optimized-routines-cmake/src/aor.c @@ -0,0 +1,115 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include +#include +#include + +// Provide default macro definitions in case that they are not defined on current linux distro. +// For example, TiFlash compiled on older linux kernels may also be used in newer ones. +// These values should be stable for Linux: only false negative is expected when running on +// older kernels, but it is acceptable as `google/cpu_features` is also doing so. +#ifndef HWCAP2_MTE +#define HWCAP2_MTE (1 << 18) +#endif + +#ifndef HWCAP_SVE +#define HWCAP_SVE (1 << 22) +#endif + +#ifndef AT_HWCAP2 +#define AT_HWCAP2 26 +#endif + +#ifndef AT_HWCAP +#define AT_HWCAP 16 +#endif + +/// check if MTE is supported in current environment +static inline bool mte_supported(void) +{ + return (getauxval(AT_HWCAP2) & HWCAP2_MTE) != 0; +} + +/// check if SVE is supported in current environment +static inline bool sve_supported(void) +{ + return (getauxval(AT_HWCAP) & HWCAP_SVE) != 0; +} + +#define STRINGIFY_IMPL(X) #X +#define STRINGIFY(X) STRINGIFY_IMPL(X) +/** + * \brief + * Symbol is defined as hidden visibility. Therefore, implementations here are only to override routines with TiFlash + * binary itself. This is because dependencies like `ld.so`, `libgcc_s.so`, etc will need essential routines like + * `memcpy` to finish the early loading procedure. Therefore, declare such symbols as visible indirect function will + * create cyclic dependency. It shall be good enough to override symbols within TiFlash, as most heavy computation works + * are happening in the main binary. + * \param NAME: exported symbol name + * \param SVE: preferred implementation when SVE is available + * \param MTE: preferred implementation when MTE is available + * \param ASIMD: preferred implementation for generic aarch64 targets (ASIMD is required by default for Armv8 and above) + */ +#define DISPATCH(NAME, SVE, MTE, ASIMD) \ + extern typeof(ASIMD) __tiflash_##NAME __attribute__((ifunc(STRINGIFY(__tiflash_##NAME##_resolver)))); \ + extern typeof(ASIMD) NAME __attribute__((visibility("hidden"), alias(STRINGIFY(__tiflash_##NAME)))); \ + _Pragma("GCC diagnostic push") \ + _Pragma("GCC diagnostic ignored \"-Wunused-function\"") static typeof(ASIMD) * __tiflash_##NAME##_resolver(void) \ + { \ + if (sve_supported()) \ + { \ + return SVE; \ + } \ + if (mte_supported()) \ + { \ + return MTE; \ + } \ + return ASIMD; \ + } \ + _Pragma("GCC diagnostic pop") +#undef memcpy +#undef memmove +#undef memset +#undef memchr +#undef memrchr +#undef memcmp +#undef strcpy +#undef stpcpy +#undef strcmp +#undef strchr +#undef strrchr +#undef strchrnul +#undef strlen +#undef strnlen +#undef strncmp + +DISPATCH(memcpy, __memcpy_aarch64_sve, __memcpy_aarch64_simd, __memcpy_aarch64_simd) +DISPATCH(memmove, __memmove_aarch64_sve, __memmove_aarch64_simd, __memmove_aarch64_simd) +DISPATCH(memset, __memset_aarch64, __memset_aarch64, __memset_aarch64) +DISPATCH(memchr, __memchr_aarch64_sve, __memchr_aarch64_mte, __memchr_aarch64) +DISPATCH(memrchr, __memrchr_aarch64, __memrchr_aarch64, __memrchr_aarch64) +DISPATCH(memcmp, __memcmp_aarch64_sve, __memcmp_aarch64, __memcmp_aarch64) +DISPATCH(strcpy, __strcpy_aarch64_sve, __strcpy_aarch64, __strcpy_aarch64) +DISPATCH(stpcpy, __stpcpy_aarch64_sve, __stpcpy_aarch64, __stpcpy_aarch64) +DISPATCH(strcmp, __strcmp_aarch64_sve, __strcmp_aarch64, __strcmp_aarch64) +DISPATCH(strchr, __strchr_aarch64_sve, __strchr_aarch64_mte, __strchr_aarch64) +DISPATCH(strrchr, __strrchr_aarch64_sve, __strrchr_aarch64_mte, __strrchr_aarch64) +DISPATCH(strchrnul, __strchrnul_aarch64_sve, __strchrnul_aarch64_mte, __strchrnul_aarch64) +DISPATCH(strlen, __strlen_aarch64_sve, __strlen_aarch64_mte, __strlen_aarch64) +DISPATCH(strnlen, __strnlen_aarch64_sve, __strnlen_aarch64, __strnlen_aarch64) +DISPATCH(strncmp, __strncmp_aarch64_sve, __strncmp_aarch64, __strncmp_aarch64) \ No newline at end of file diff --git a/contrib/client-c b/contrib/client-c index 36e05cb0f24..034d1e782cb 160000 --- a/contrib/client-c +++ b/contrib/client-c @@ -1 +1 @@ -Subproject commit 36e05cb0f24c085785abf367176dac2a45bfd67b +Subproject commit 034d1e782cb4697f99b09b679c00dade00f19dd5 diff --git a/contrib/jemalloc b/contrib/jemalloc index ea6b3e973b4..54eaed1d8b5 160000 --- a/contrib/jemalloc +++ b/contrib/jemalloc @@ -1 +1 @@ -Subproject commit ea6b3e973b477b8061e0076bb257dbd7f3faa756 +Subproject commit 54eaed1d8b56b1aa528be3bdd1877e59c56fa90c diff --git a/contrib/jemalloc-cmake/CMakeLists.txt b/contrib/jemalloc-cmake/CMakeLists.txt index ef02fbabc81..91b17eb8ec7 100644 --- a/contrib/jemalloc-cmake/CMakeLists.txt +++ b/contrib/jemalloc-cmake/CMakeLists.txt @@ -4,65 +4,136 @@ endif() set(JEMALLOC_SOURCE_DIR ${TiFlash_SOURCE_DIR}/contrib/jemalloc) -set(SRCS -${JEMALLOC_SOURCE_DIR}/src/arena.c -${JEMALLOC_SOURCE_DIR}/src/background_thread.c -${JEMALLOC_SOURCE_DIR}/src/base.c -${JEMALLOC_SOURCE_DIR}/src/bin.c -${JEMALLOC_SOURCE_DIR}/src/bitmap.c -${JEMALLOC_SOURCE_DIR}/src/ckh.c -${JEMALLOC_SOURCE_DIR}/src/ctl.c -${JEMALLOC_SOURCE_DIR}/src/div.c -${JEMALLOC_SOURCE_DIR}/src/extent.c -${JEMALLOC_SOURCE_DIR}/src/extent_dss.c -${JEMALLOC_SOURCE_DIR}/src/extent_mmap.c -${JEMALLOC_SOURCE_DIR}/src/hash.c -${JEMALLOC_SOURCE_DIR}/src/hook.c -${JEMALLOC_SOURCE_DIR}/src/jemalloc.c -${JEMALLOC_SOURCE_DIR}/src/jemalloc_cpp.cpp -${JEMALLOC_SOURCE_DIR}/src/large.c -${JEMALLOC_SOURCE_DIR}/src/log.c -${JEMALLOC_SOURCE_DIR}/src/malloc_io.c -${JEMALLOC_SOURCE_DIR}/src/mutex.c -${JEMALLOC_SOURCE_DIR}/src/mutex_pool.c -${JEMALLOC_SOURCE_DIR}/src/nstime.c -${JEMALLOC_SOURCE_DIR}/src/pages.c -${JEMALLOC_SOURCE_DIR}/src/prng.c -${JEMALLOC_SOURCE_DIR}/src/prof.c -${JEMALLOC_SOURCE_DIR}/src/rtree.c -${JEMALLOC_SOURCE_DIR}/src/sc.c -${JEMALLOC_SOURCE_DIR}/src/stats.c -${JEMALLOC_SOURCE_DIR}/src/sz.c -${JEMALLOC_SOURCE_DIR}/src/tcache.c -${JEMALLOC_SOURCE_DIR}/src/test_hooks.c -${JEMALLOC_SOURCE_DIR}/src/ticker.c -${JEMALLOC_SOURCE_DIR}/src/tsd.c -${JEMALLOC_SOURCE_DIR}/src/witness.c -${JEMALLOC_SOURCE_DIR}/src/safety_check.c +set (SRCS + "${JEMALLOC_SOURCE_DIR}/src/arena.c" + "${JEMALLOC_SOURCE_DIR}/src/background_thread.c" + "${JEMALLOC_SOURCE_DIR}/src/base.c" + "${JEMALLOC_SOURCE_DIR}/src/bin.c" + "${JEMALLOC_SOURCE_DIR}/src/bin_info.c" + "${JEMALLOC_SOURCE_DIR}/src/bitmap.c" + "${JEMALLOC_SOURCE_DIR}/src/buf_writer.c" + "${JEMALLOC_SOURCE_DIR}/src/cache_bin.c" + "${JEMALLOC_SOURCE_DIR}/src/ckh.c" + "${JEMALLOC_SOURCE_DIR}/src/counter.c" + "${JEMALLOC_SOURCE_DIR}/src/ctl.c" + "${JEMALLOC_SOURCE_DIR}/src/decay.c" + "${JEMALLOC_SOURCE_DIR}/src/div.c" + "${JEMALLOC_SOURCE_DIR}/src/ecache.c" + "${JEMALLOC_SOURCE_DIR}/src/edata.c" + "${JEMALLOC_SOURCE_DIR}/src/edata_cache.c" + "${JEMALLOC_SOURCE_DIR}/src/ehooks.c" + "${JEMALLOC_SOURCE_DIR}/src/emap.c" + "${JEMALLOC_SOURCE_DIR}/src/eset.c" + "${JEMALLOC_SOURCE_DIR}/src/exp_grow.c" + "${JEMALLOC_SOURCE_DIR}/src/extent.c" + "${JEMALLOC_SOURCE_DIR}/src/extent_dss.c" + "${JEMALLOC_SOURCE_DIR}/src/extent_mmap.c" + "${JEMALLOC_SOURCE_DIR}/src/fxp.c" + "${JEMALLOC_SOURCE_DIR}/src/hook.c" + "${JEMALLOC_SOURCE_DIR}/src/hpa.c" + "${JEMALLOC_SOURCE_DIR}/src/hpa_hooks.c" + "${JEMALLOC_SOURCE_DIR}/src/hpdata.c" + "${JEMALLOC_SOURCE_DIR}/src/inspect.c" + "${JEMALLOC_SOURCE_DIR}/src/jemalloc.c" + "${JEMALLOC_SOURCE_DIR}/src/large.c" + "${JEMALLOC_SOURCE_DIR}/src/log.c" + "${JEMALLOC_SOURCE_DIR}/src/malloc_io.c" + "${JEMALLOC_SOURCE_DIR}/src/mutex.c" + "${JEMALLOC_SOURCE_DIR}/src/nstime.c" + "${JEMALLOC_SOURCE_DIR}/src/pa.c" + "${JEMALLOC_SOURCE_DIR}/src/pac.c" + "${JEMALLOC_SOURCE_DIR}/src/pa_extra.c" + "${JEMALLOC_SOURCE_DIR}/src/pages.c" + "${JEMALLOC_SOURCE_DIR}/src/pai.c" + "${JEMALLOC_SOURCE_DIR}/src/peak_event.c" + "${JEMALLOC_SOURCE_DIR}/src/prof.c" + "${JEMALLOC_SOURCE_DIR}/src/prof_data.c" + "${JEMALLOC_SOURCE_DIR}/src/prof_log.c" + "${JEMALLOC_SOURCE_DIR}/src/prof_recent.c" + "${JEMALLOC_SOURCE_DIR}/src/prof_stats.c" + "${JEMALLOC_SOURCE_DIR}/src/prof_sys.c" + "${JEMALLOC_SOURCE_DIR}/src/psset.c" + "${JEMALLOC_SOURCE_DIR}/src/rtree.c" + "${JEMALLOC_SOURCE_DIR}/src/safety_check.c" + "${JEMALLOC_SOURCE_DIR}/src/san_bump.c" + "${JEMALLOC_SOURCE_DIR}/src/san.c" + "${JEMALLOC_SOURCE_DIR}/src/sc.c" + "${JEMALLOC_SOURCE_DIR}/src/sec.c" + "${JEMALLOC_SOURCE_DIR}/src/stats.c" + "${JEMALLOC_SOURCE_DIR}/src/sz.c" + "${JEMALLOC_SOURCE_DIR}/src/tcache.c" + "${JEMALLOC_SOURCE_DIR}/src/test_hooks.c" + "${JEMALLOC_SOURCE_DIR}/src/thread_event.c" + "${JEMALLOC_SOURCE_DIR}/src/ticker.c" + "${JEMALLOC_SOURCE_DIR}/src/tsd.c" + "${JEMALLOC_SOURCE_DIR}/src/witness.c" ) if(CMAKE_SYSTEM_NAME MATCHES "Darwin") list(APPEND SRCS ${JEMALLOC_SOURCE_DIR}/src/zone.c) endif() +if (ARCH_LINUX) + # ThreadPool select job randomly, and there can be some threads that had been + # performed some memory heavy task before and will be inactive for some time, + # but until it will became active again, the memory will not be freed since by + # default each thread has it's own arena, but there should be not more then + # 4*CPU arenas (see opt.nareans description). + # + # By enabling percpu_arena number of arenas limited to number of CPUs and hence + # this problem should go away. + # + # muzzy_decay_ms -- use MADV_FREE when available on newer Linuxes, to + # avoid spurious latencies and additional work associated with + # MADV_DONTNEED. See + # https://github.com/ClickHouse/ClickHouse/issues/11121 for motivation. + set (JEMALLOC_CONFIG_MALLOC_CONF "percpu_arena:percpu,oversize_threshold:0,muzzy_decay_ms:5000,dirty_decay_ms:5000") +else() + set (JEMALLOC_CONFIG_MALLOC_CONF "oversize_threshold:0,muzzy_decay_ms:5000,dirty_decay_ms:5000") +endif() + +message (STATUS "jemalloc malloc_conf: ${JEMALLOC_CONFIG_MALLOC_CONF}") + if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU") set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -w") endif () add_library(jemalloc STATIC ${SRCS}) +set (JEMALLOC_INCLUDE_PREFIX) + +if (ARCH_LINUX) + set (JEMALLOC_INCLUDE_PREFIX "include_linux") + target_compile_definitions(jemalloc PRIVATE JEMALLOC_MADV_FREE=8) +elseif (ARCH_FREEBSD) + set (JEMALLOC_INCLUDE_PREFIX "include_freebsd") +elseif (APPLE) + set (JEMALLOC_INCLUDE_PREFIX "include_darwin") +else () + message (FATAL_ERROR "internal jemalloc: This OS is not supported") +endif () -if (ARCH_ARM) - target_include_directories(jemalloc PUBLIC - ${CMAKE_CURRENT_SOURCE_DIR}/include - ${CMAKE_CURRENT_SOURCE_DIR}/include_linux_aarch64) +if (ARCH_AMD64) + if (USE_MUSL) + set(JEMALLOC_INCLUDE_PREFIX "${JEMALLOC_INCLUDE_PREFIX}_x86_64_musl") + else() + set(JEMALLOC_INCLUDE_PREFIX "${JEMALLOC_INCLUDE_PREFIX}_x86_64") + endif() +elseif (ARCH_AARCH64) + set(JEMALLOC_INCLUDE_PREFIX "${JEMALLOC_INCLUDE_PREFIX}_aarch64") +elseif (ARCH_PPC64LE) + set(JEMALLOC_INCLUDE_PREFIX "${JEMALLOC_INCLUDE_PREFIX}_ppc64le") +elseif (ARCH_RISCV64) + set(JEMALLOC_INCLUDE_PREFIX "${JEMALLOC_INCLUDE_PREFIX}_riscv64") else () - target_include_directories(jemalloc PUBLIC - ${CMAKE_CURRENT_SOURCE_DIR}/include - ${CMAKE_CURRENT_SOURCE_DIR}/include_linux_x86_64) + message (FATAL_ERROR "internal jemalloc: This arch is not supported") endif () -target_include_directories(jemalloc PRIVATE - ${JEMALLOC_SOURCE_DIR}/include) +configure_file(${JEMALLOC_INCLUDE_PREFIX}/jemalloc/internal/jemalloc_internal_defs.h.in + ${JEMALLOC_INCLUDE_PREFIX}/jemalloc/internal/jemalloc_internal_defs.h) +target_include_directories(jemalloc SYSTEM PRIVATE + "${CMAKE_CURRENT_BINARY_DIR}/${JEMALLOC_INCLUDE_PREFIX}/jemalloc/internal") + +target_include_directories(jemalloc PUBLIC ${JEMALLOC_SOURCE_DIR}/include ${TiFlash_SOURCE_DIR}/contrib/jemalloc-cmake/include) target_compile_definitions(jemalloc PRIVATE -DJEMALLOC_NO_PRIVATE_NAMESPACE) @@ -80,3 +151,5 @@ if (ENABLE_JEMALLOC_PROF) target_link_libraries (jemalloc PRIVATE ${UNWIND_LIBRARY}) endif () endif () + +target_compile_options(jemalloc PRIVATE -D_GNU_SOURCE) diff --git a/contrib/jemalloc-cmake/include_linux_aarch64/jemalloc/internal/jemalloc_preamble.h b/contrib/jemalloc-cmake/include/jemalloc/internal/jemalloc_preamble.h similarity index 69% rename from contrib/jemalloc-cmake/include_linux_aarch64/jemalloc/internal/jemalloc_preamble.h rename to contrib/jemalloc-cmake/include/jemalloc/internal/jemalloc_preamble.h index d79551e1f25..45f43a6cd02 100644 --- a/contrib/jemalloc-cmake/include_linux_aarch64/jemalloc/internal/jemalloc_preamble.h +++ b/contrib/jemalloc-cmake/include/jemalloc/internal/jemalloc_preamble.h @@ -1,27 +1,33 @@ #ifndef JEMALLOC_PREAMBLE_H #define JEMALLOC_PREAMBLE_H -#include "jemalloc_internal_defs.h" #include "jemalloc/internal/jemalloc_internal_decls.h" +#include "jemalloc_internal_defs.h" -#ifdef JEMALLOC_UTRACE +#if defined(JEMALLOC_UTRACE) || defined(JEMALLOC_UTRACE_LABEL) #include +#if defined(JEMALLOC_UTRACE) +#define UTRACE_CALL(p, l) utrace(p, l) +#else +#define UTRACE_CALL(p, l) utrace("jemalloc_process", p, l) +#define JEMALLOC_UTRACE +#endif #endif #define JEMALLOC_NO_DEMANGLE #ifdef JEMALLOC_JET -# undef JEMALLOC_IS_MALLOC -# define JEMALLOC_N(n) jet_##n -# include "jemalloc/internal/public_namespace.h" -# define JEMALLOC_NO_RENAME -# include "jemalloc/jemalloc.h" -# undef JEMALLOC_NO_RENAME +#undef JEMALLOC_IS_MALLOC +#define JEMALLOC_N(n) jet_##n +#include "jemalloc/internal/public_namespace.h" +#define JEMALLOC_NO_RENAME +#include "jemalloc/jemalloc.h" +#undef JEMALLOC_NO_RENAME #else -# define JEMALLOC_N(n) je_##n -# include "jemalloc/jemalloc.h" +#define JEMALLOC_N(n) je_##n +#include "jemalloc/jemalloc.h" #endif -#if (defined(JEMALLOC_OSATOMIC) || defined(JEMALLOC_OSSPIN)) +#if defined(JEMALLOC_OSATOMIC) #include #endif @@ -39,16 +45,16 @@ * possible. */ #ifndef JEMALLOC_NO_PRIVATE_NAMESPACE -# ifndef JEMALLOC_JET -# include "jemalloc/internal/private_namespace.h" -# else -# include "jemalloc/internal/private_namespace_jet.h" -# endif +#ifndef JEMALLOC_JET +#include "jemalloc/internal/private_namespace.h" +#else +#include "jemalloc/internal/private_namespace_jet.h" +#endif #endif #include "jemalloc/internal/test_hooks.h" #ifdef JEMALLOC_DEFINE_MADVISE_FREE -# define JEMALLOC_MADV_FREE 8 +#define JEMALLOC_MADV_FREE 8 #endif static const bool config_debug = @@ -161,7 +167,55 @@ static const bool config_log = false #endif ; -#ifdef JEMALLOC_HAVE_SCHED_GETCPU +/* + * Are extra safety checks enabled; things like checking the size of sized + * deallocations, double-frees, etc. + */ +static const bool config_opt_safety_checks = +#ifdef JEMALLOC_OPT_SAFETY_CHECKS + true +#elif defined(JEMALLOC_DEBUG) + /* + * This lets us only guard safety checks by one flag instead of two; fast + * checks can guard solely by config_opt_safety_checks and run in debug mode + * too. + */ + true +#else + false +#endif + ; + +/* + * Extra debugging of sized deallocations too onerous to be included in the + * general safety checks. + */ +static const bool config_opt_size_checks = +#if defined(JEMALLOC_OPT_SIZE_CHECKS) || defined(JEMALLOC_DEBUG) + true +#else + false +#endif + ; + +static const bool config_uaf_detection = +#if defined(JEMALLOC_UAF_DETECTION) || defined(JEMALLOC_DEBUG) + true +#else + false +#endif + ; + +/* Whether or not the C++ extensions are enabled. */ +static const bool config_enable_cxx = +#ifdef JEMALLOC_ENABLE_CXX + true +#else + false +#endif + ; + +#if defined(_WIN32) || defined(JEMALLOC_HAVE_SCHED_GETCPU) /* Currently percpu_arena depends on sched_getcpu. */ #define JEMALLOC_PERCPU_ARENA #endif @@ -190,23 +244,16 @@ static const bool have_background_thread = false #endif ; - -#define JEMALLOC_GCC_U8_ATOMIC_ATOMICS 1 -#define JEMALLOC_GCC_U8_SYNC_ATOMICS 1 - -/* - * Are extra safety checks enabled; things like checking the size of sized - * deallocations, double-frees, etc. - */ -static const bool config_opt_safety_checks = -#ifdef JEMALLOC_OPT_SAFETY_CHECKS +static const bool config_high_res_timer = +#ifdef JEMALLOC_HAVE_CLOCK_REALTIME true -#elif defined(JEMALLOC_DEBUG) - /* - * This lets us only guard safety checks by one flag instead of two; fast - * checks can guard solely by config_opt_safety_checks and run in debug mode - * too. - */ +#else + false +#endif + ; + +static const bool have_memcntl = +#ifdef JEMALLOC_HAVE_MEMCNTL true #else false diff --git a/contrib/jemalloc-cmake/include/jemalloc/jemalloc.h b/contrib/jemalloc-cmake/include/jemalloc/jemalloc.h index d06243c5239..e90fa892100 100644 --- a/contrib/jemalloc-cmake/include/jemalloc/jemalloc.h +++ b/contrib/jemalloc-cmake/include/jemalloc/jemalloc.h @@ -4,13 +4,21 @@ extern "C" { #endif +#if !defined(__clang__) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wredundant-decls" +#endif + #include -#include #include #include +#include #include +#if !defined(__clang__) +#pragma GCC diagnostic pop +#endif + #ifdef __cplusplus } #endif - diff --git a/contrib/jemalloc-cmake/include_linux_x86_64/jemalloc/jemalloc_defs.h b/contrib/jemalloc-cmake/include/jemalloc/jemalloc_defs.h similarity index 67% rename from contrib/jemalloc-cmake/include_linux_x86_64/jemalloc/jemalloc_defs.h rename to contrib/jemalloc-cmake/include/jemalloc/jemalloc_defs.h index d1389237a77..1fc77be57cf 100644 --- a/contrib/jemalloc-cmake/include_linux_x86_64/jemalloc/jemalloc_defs.h +++ b/contrib/jemalloc-cmake/include/jemalloc/jemalloc_defs.h @@ -5,15 +5,29 @@ /* Defined if alloc_size attribute is supported. */ #define JEMALLOC_HAVE_ATTR_ALLOC_SIZE +/* Defined if format_arg(...) attribute is supported. */ +#define JEMALLOC_HAVE_ATTR_FORMAT_ARG + +/* Defined if format(gnu_printf, ...) attribute is supported. */ +/* #undef JEMALLOC_HAVE_ATTR_FORMAT_GNU_PRINTF */ + /* Defined if format(printf, ...) attribute is supported. */ #define JEMALLOC_HAVE_ATTR_FORMAT_PRINTF +/* Defined if fallthrough attribute is supported. */ +#define JEMALLOC_HAVE_ATTR_FALLTHROUGH + +/* Defined if cold attribute is supported. */ +#define JEMALLOC_HAVE_ATTR_COLD + /* * Define overrides for non-standard allocator-related functions if they are * present on the system. */ +#if !defined(USE_MUSL) #define JEMALLOC_OVERRIDE_MEMALIGN #define JEMALLOC_OVERRIDE_VALLOC +#endif /* * At least Linux omits the "const" in: @@ -32,11 +46,11 @@ #define JEMALLOC_USE_CXX_THROW #ifdef _MSC_VER -# ifdef _WIN64 -# define LG_SIZEOF_PTR_WIN 3 -# else -# define LG_SIZEOF_PTR_WIN 2 -# endif +#ifdef _WIN64 +#define LG_SIZEOF_PTR_WIN 3 +#else +#define LG_SIZEOF_PTR_WIN 2 +#endif #endif /* sizeof(void *) == 2^LG_SIZEOF_PTR. */ diff --git a/contrib/jemalloc-cmake/include/jemalloc/jemalloc_macros.h b/contrib/jemalloc-cmake/include/jemalloc/jemalloc_macros.h new file mode 100644 index 00000000000..ccb22470e64 --- /dev/null +++ b/contrib/jemalloc-cmake/include/jemalloc/jemalloc_macros.h @@ -0,0 +1,148 @@ +#include +#include +#include +#include +#include + +#define JEMALLOC_VERSION "5.3-RC" +#define JEMALLOC_VERSION_MAJOR 5 +#define JEMALLOC_VERSION_MINOR 3 +#define JEMALLOC_VERSION_BUGFIX 0 +#define JEMALLOC_VERSION_NREV 0 +#define JEMALLOC_VERSION_GID "ca709c3139f77f4c00a903cdee46d71e9028f6c6" +#define JEMALLOC_VERSION_GID_IDENT ca709c3139f77f4c00a903cdee46d71e9028f6c6 + +#define MALLOCX_LG_ALIGN(la) ((int)(la)) +#if LG_SIZEOF_PTR == 2 +#define MALLOCX_ALIGN(a) ((int)(ffs((int)(a)) - 1)) +#else +#define MALLOCX_ALIGN(a) \ + ((int)(((size_t)(a) < (size_t)INT_MAX) ? ffs((int)(a)) - 1 : ffs((int)(((size_t)(a)) >> 32)) + 31)) +#endif +#define MALLOCX_ZERO ((int)0x40) +/* + * Bias tcache index bits so that 0 encodes "automatic tcache management", and 1 + * encodes MALLOCX_TCACHE_NONE. + */ +#define MALLOCX_TCACHE(tc) ((int)(((tc) + 2) << 8)) +#define MALLOCX_TCACHE_NONE MALLOCX_TCACHE(-1) +/* + * Bias arena index bits so that 0 encodes "use an automatically chosen arena". + */ +#define MALLOCX_ARENA(a) ((((int)(a)) + 1) << 20) + +/* + * Use as arena index in "arena..{purge,decay,dss}" and + * "stats.arenas..*" mallctl interfaces to select all arenas. This + * definition is intentionally specified in raw decimal format to support + * cpp-based string concatenation, e.g. + * + * #define STRINGIFY_HELPER(x) #x + * #define STRINGIFY(x) STRINGIFY_HELPER(x) + * + * mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".purge", NULL, NULL, NULL, + * 0); + */ +#define MALLCTL_ARENAS_ALL 4096 +/* + * Use as arena index in "stats.arenas..*" mallctl interfaces to select + * destroyed arenas. + */ +#define MALLCTL_ARENAS_DESTROYED 4097 + +#if defined(__cplusplus) && defined(JEMALLOC_USE_CXX_THROW) +#define JEMALLOC_CXX_THROW throw() +#else +#define JEMALLOC_CXX_THROW +#endif + +#if defined(_MSC_VER) +#define JEMALLOC_ATTR(s) +#define JEMALLOC_ALIGNED(s) __declspec(align(s)) +#define JEMALLOC_ALLOC_SIZE(s) +#define JEMALLOC_ALLOC_SIZE2(s1, s2) +#ifndef JEMALLOC_EXPORT +#ifdef DLLEXPORT +#define JEMALLOC_EXPORT __declspec(dllexport) +#else +#define JEMALLOC_EXPORT __declspec(dllimport) +#endif +#endif +#define JEMALLOC_FORMAT_ARG(i) +#define JEMALLOC_FORMAT_PRINTF(s, i) +#define JEMALLOC_FALLTHROUGH +#define JEMALLOC_NOINLINE __declspec(noinline) +#ifdef __cplusplus +#define JEMALLOC_NOTHROW __declspec(nothrow) +#else +#define JEMALLOC_NOTHROW +#endif +#define JEMALLOC_SECTION(s) __declspec(allocate(s)) +#define JEMALLOC_RESTRICT_RETURN __declspec(restrict) +#if _MSC_VER >= 1900 && !defined(__EDG__) +#define JEMALLOC_ALLOCATOR __declspec(allocator) +#else +#define JEMALLOC_ALLOCATOR +#endif +#define JEMALLOC_COLD +#elif defined(JEMALLOC_HAVE_ATTR) +#define JEMALLOC_ATTR(s) __attribute__((s)) +#define JEMALLOC_ALIGNED(s) JEMALLOC_ATTR(aligned(s)) +#ifdef JEMALLOC_HAVE_ATTR_ALLOC_SIZE +#define JEMALLOC_ALLOC_SIZE(s) JEMALLOC_ATTR(alloc_size(s)) +#define JEMALLOC_ALLOC_SIZE2(s1, s2) JEMALLOC_ATTR(alloc_size(s1, s2)) +#else +#define JEMALLOC_ALLOC_SIZE(s) +#define JEMALLOC_ALLOC_SIZE2(s1, s2) +#endif +#ifndef JEMALLOC_EXPORT +#define JEMALLOC_EXPORT JEMALLOC_ATTR(visibility("default")) +#endif +#ifdef JEMALLOC_HAVE_ATTR_FORMAT_ARG +#define JEMALLOC_FORMAT_ARG(i) JEMALLOC_ATTR(__format_arg__(3)) +#else +#define JEMALLOC_FORMAT_ARG(i) +#endif +#ifdef JEMALLOC_HAVE_ATTR_FORMAT_GNU_PRINTF +#define JEMALLOC_FORMAT_PRINTF(s, i) JEMALLOC_ATTR(format(gnu_printf, s, i)) +#elif defined(JEMALLOC_HAVE_ATTR_FORMAT_PRINTF) +#define JEMALLOC_FORMAT_PRINTF(s, i) JEMALLOC_ATTR(format(printf, s, i)) +#else +#define JEMALLOC_FORMAT_PRINTF(s, i) +#endif +#ifdef JEMALLOC_HAVE_ATTR_FALLTHROUGH +#define JEMALLOC_FALLTHROUGH JEMALLOC_ATTR(fallthrough) +#else +#define JEMALLOC_FALLTHROUGH +#endif +#define JEMALLOC_NOINLINE JEMALLOC_ATTR(noinline) +#define JEMALLOC_NOTHROW JEMALLOC_ATTR(nothrow) +#define JEMALLOC_SECTION(s) JEMALLOC_ATTR(section(s)) +#define JEMALLOC_RESTRICT_RETURN +#define JEMALLOC_ALLOCATOR +#ifdef JEMALLOC_HAVE_ATTR_COLD +#define JEMALLOC_COLD JEMALLOC_ATTR(__cold__) +#else +#define JEMALLOC_COLD +#endif +#else +#define JEMALLOC_ATTR(s) +#define JEMALLOC_ALIGNED(s) +#define JEMALLOC_ALLOC_SIZE(s) +#define JEMALLOC_ALLOC_SIZE2(s1, s2) +#define JEMALLOC_EXPORT +#define JEMALLOC_FORMAT_PRINTF(s, i) +#define JEMALLOC_FALLTHROUGH +#define JEMALLOC_NOINLINE +#define JEMALLOC_NOTHROW +#define JEMALLOC_SECTION(s) +#define JEMALLOC_RESTRICT_RETURN +#define JEMALLOC_ALLOCATOR +#define JEMALLOC_COLD +#endif + +#if (defined(__APPLE__) || defined(__FreeBSD__)) && !defined(JEMALLOC_NO_RENAME) +#define JEMALLOC_SYS_NOTHROW +#else +#define JEMALLOC_SYS_NOTHROW JEMALLOC_NOTHROW +#endif diff --git a/contrib/jemalloc-cmake/include/jemalloc/jemalloc_protos.h b/contrib/jemalloc-cmake/include/jemalloc/jemalloc_protos.h new file mode 100644 index 00000000000..31f72d3a2af --- /dev/null +++ b/contrib/jemalloc-cmake/include/jemalloc/jemalloc_protos.h @@ -0,0 +1,86 @@ +// OSX does not have this for system alloc functions, so you will get +// "exception specification in declaration" error. +#if defined(__APPLE__) || defined(__FreeBSD__) || defined(USE_MUSL) +#undef JEMALLOC_NOTHROW +#define JEMALLOC_NOTHROW + +#undef JEMALLOC_SYS_NOTHROW +#define JEMALLOC_SYS_NOTHROW + +#undef JEMALLOC_CXX_THROW +#define JEMALLOC_CXX_THROW +#endif + +#include "jemalloc_rename.h" + +/* + * The je_ prefix on the following public symbol declarations is an artifact + * of namespace management, and should be omitted in application code unless + * JEMALLOC_NO_DEMANGLE is defined (see jemalloc_mangle.h). + */ +extern JEMALLOC_EXPORT const char * je_malloc_conf; +extern JEMALLOC_EXPORT void (*je_malloc_message)(void * cbopaque, + const char * s); + +JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN void JEMALLOC_SYS_NOTHROW * je_malloc(size_t size) + JEMALLOC_CXX_THROW JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE(1); +JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN void JEMALLOC_SYS_NOTHROW * je_calloc(size_t num, size_t size) + JEMALLOC_CXX_THROW JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE2(1, 2); +JEMALLOC_EXPORT int JEMALLOC_SYS_NOTHROW je_posix_memalign( + void ** memptr, + size_t alignment, + size_t size) JEMALLOC_CXX_THROW + JEMALLOC_ATTR(nonnull(1)); +JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN void JEMALLOC_SYS_NOTHROW * je_aligned_alloc(size_t alignment, + size_t size) JEMALLOC_CXX_THROW JEMALLOC_ATTR(malloc) + JEMALLOC_ALLOC_SIZE(2); +JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN void JEMALLOC_SYS_NOTHROW * je_realloc(void * ptr, size_t size) + JEMALLOC_CXX_THROW JEMALLOC_ALLOC_SIZE(2); +JEMALLOC_EXPORT void JEMALLOC_SYS_NOTHROW je_free(void * ptr) + JEMALLOC_CXX_THROW; + +JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN void JEMALLOC_NOTHROW * je_mallocx(size_t size, int flags) + JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE(1); +JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN void JEMALLOC_NOTHROW * je_rallocx(void * ptr, size_t size, int flags) JEMALLOC_ALLOC_SIZE(2); +JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW je_xallocx(void * ptr, size_t size, size_t extra, int flags); +JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW je_sallocx(const void * ptr, + int flags) JEMALLOC_ATTR(pure); +JEMALLOC_EXPORT void JEMALLOC_NOTHROW je_dallocx(void * ptr, int flags); +JEMALLOC_EXPORT void JEMALLOC_NOTHROW je_sdallocx(void * ptr, size_t size, int flags); +JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW je_nallocx(size_t size, int flags) + JEMALLOC_ATTR(pure); + +JEMALLOC_EXPORT int JEMALLOC_NOTHROW je_mallctl(const char * name, + void * oldp, + size_t * oldlenp, + void * newp, + size_t newlen); +JEMALLOC_EXPORT int JEMALLOC_NOTHROW je_mallctlnametomib(const char * name, + size_t * mibp, + size_t * miblenp); +JEMALLOC_EXPORT int JEMALLOC_NOTHROW je_mallctlbymib(const size_t * mib, + size_t miblen, + void * oldp, + size_t * oldlenp, + void * newp, + size_t newlen); +JEMALLOC_EXPORT void JEMALLOC_NOTHROW je_malloc_stats_print( + void (*write_cb)(void *, const char *), + void * je_cbopaque, + const char * opts); +JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW je_malloc_usable_size( + JEMALLOC_USABLE_SIZE_CONST void * ptr) JEMALLOC_CXX_THROW; +#ifdef JEMALLOC_HAVE_MALLOC_SIZE +JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW je_malloc_size( + const void * ptr); +#endif + +#ifdef JEMALLOC_OVERRIDE_MEMALIGN +JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN void JEMALLOC_SYS_NOTHROW * je_memalign(size_t alignment, size_t size) + JEMALLOC_CXX_THROW JEMALLOC_ATTR(malloc); +#endif + +#ifdef JEMALLOC_OVERRIDE_VALLOC +JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN void JEMALLOC_SYS_NOTHROW * je_valloc(size_t size) JEMALLOC_CXX_THROW + JEMALLOC_ATTR(malloc); +#endif diff --git a/contrib/jemalloc-cmake/include/jemalloc/jemalloc_protos_jet.h b/contrib/jemalloc-cmake/include/jemalloc/jemalloc_protos_jet.h new file mode 100644 index 00000000000..195d57e2997 --- /dev/null +++ b/contrib/jemalloc-cmake/include/jemalloc/jemalloc_protos_jet.h @@ -0,0 +1,71 @@ +/* + * The jet_ prefix on the following public symbol declarations is an artifact + * of namespace management, and should be omitted in application code unless + * JEMALLOC_NO_DEMANGLE is defined (see jemalloc_mangle@install_suffix@.h). + */ +extern JEMALLOC_EXPORT const char * jet_malloc_conf; +extern JEMALLOC_EXPORT void (*jet_malloc_message)(void * cbopaque, + const char * s); + +JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN void JEMALLOC_SYS_NOTHROW * jet_malloc(size_t size) + JEMALLOC_CXX_THROW JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE(1); +JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN void JEMALLOC_SYS_NOTHROW * jet_calloc(size_t num, size_t size) + JEMALLOC_CXX_THROW JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE2(1, 2); +JEMALLOC_EXPORT int JEMALLOC_SYS_NOTHROW jet_posix_memalign( + void ** memptr, + size_t alignment, + size_t size) JEMALLOC_CXX_THROW + JEMALLOC_ATTR(nonnull(1)); +JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN void JEMALLOC_SYS_NOTHROW * jet_aligned_alloc(size_t alignment, + size_t size) JEMALLOC_CXX_THROW JEMALLOC_ATTR(malloc) + JEMALLOC_ALLOC_SIZE(2); +JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN void JEMALLOC_SYS_NOTHROW * jet_realloc(void * ptr, size_t size) + JEMALLOC_CXX_THROW JEMALLOC_ALLOC_SIZE(2); +JEMALLOC_EXPORT void JEMALLOC_SYS_NOTHROW jet_free(void * ptr) + JEMALLOC_CXX_THROW; + +JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN void JEMALLOC_NOTHROW * jet_mallocx(size_t size, int flags) + JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE(1); +JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN void JEMALLOC_NOTHROW * jet_rallocx(void * ptr, size_t size, int flags) JEMALLOC_ALLOC_SIZE(2); +JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW jet_xallocx(void * ptr, size_t size, size_t extra, int flags); +JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW jet_sallocx(const void * ptr, + int flags) JEMALLOC_ATTR(pure); +JEMALLOC_EXPORT void JEMALLOC_NOTHROW jet_dallocx(void * ptr, int flags); +JEMALLOC_EXPORT void JEMALLOC_NOTHROW jet_sdallocx(void * ptr, size_t size, int flags); +JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW jet_nallocx(size_t size, int flags) + JEMALLOC_ATTR(pure); + +JEMALLOC_EXPORT int JEMALLOC_NOTHROW jet_mallctl(const char * name, + void * oldp, + size_t * oldlenp, + void * newp, + size_t newlen); +JEMALLOC_EXPORT int JEMALLOC_NOTHROW jet_mallctlnametomib(const char * name, + size_t * mibp, + size_t * miblenp); +JEMALLOC_EXPORT int JEMALLOC_NOTHROW jet_mallctlbymib(const size_t * mib, + size_t miblen, + void * oldp, + size_t * oldlenp, + void * newp, + size_t newlen); +JEMALLOC_EXPORT void JEMALLOC_NOTHROW jet_malloc_stats_print( + void (*write_cb)(void *, const char *), + void * jet_cbopaque, + const char * opts); +JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW jet_malloc_usable_size( + JEMALLOC_USABLE_SIZE_CONST void * ptr) JEMALLOC_CXX_THROW; +#ifdef JEMALLOC_HAVE_MALLOC_SIZE +JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW jet_malloc_size( + const void * ptr); +#endif + +#ifdef JEMALLOC_OVERRIDE_MEMALIGN +JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN void JEMALLOC_SYS_NOTHROW * jet_memalign(size_t alignment, size_t size) + JEMALLOC_CXX_THROW JEMALLOC_ATTR(malloc); +#endif + +#ifdef JEMALLOC_OVERRIDE_VALLOC +JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN void JEMALLOC_SYS_NOTHROW * jet_valloc(size_t size) JEMALLOC_CXX_THROW + JEMALLOC_ATTR(malloc); +#endif diff --git a/contrib/jemalloc-cmake/include/jemalloc/jemalloc_rename.h b/contrib/jemalloc-cmake/include/jemalloc/jemalloc_rename.h index a2ea2dd3533..d032d46752d 100644 --- a/contrib/jemalloc-cmake/include/jemalloc/jemalloc_rename.h +++ b/contrib/jemalloc-cmake/include/jemalloc/jemalloc_rename.h @@ -4,26 +4,28 @@ * these macro definitions. */ #ifndef JEMALLOC_NO_RENAME -# define je_aligned_alloc aligned_alloc -# define je_calloc calloc -# define je_dallocx dallocx -# define je_free free -# define je_mallctl mallctl -# define je_mallctlbymib mallctlbymib -# define je_mallctlnametomib mallctlnametomib -# define je_malloc malloc -# define je_malloc_conf malloc_conf -# define je_malloc_message malloc_message -# define je_malloc_stats_print malloc_stats_print -# define je_malloc_usable_size malloc_usable_size -# define je_mallocx mallocx -# define je_nallocx nallocx -# define je_posix_memalign posix_memalign -# define je_rallocx rallocx -# define je_realloc realloc -# define je_sallocx sallocx -# define je_sdallocx sdallocx -# define je_xallocx xallocx -# define je_memalign memalign -# define je_valloc valloc +#define je_aligned_alloc aligned_alloc +#define je_calloc calloc +#define je_dallocx dallocx +#define je_free free +#define je_mallctl mallctl +#define je_mallctlbymib mallctlbymib +#define je_mallctlnametomib mallctlnametomib +#define je_malloc malloc +#define je_malloc_conf malloc_conf +#define je_malloc_conf_2_conf_harder malloc_conf_2_conf_harder +#define je_malloc_message malloc_message +#define je_malloc_stats_print malloc_stats_print +#define je_malloc_usable_size malloc_usable_size +#define je_mallocx mallocx +#define je_smallocx_ca709c3139f77f4c00a903cdee46d71e9028f6c6 smallocx_ca709c3139f77f4c00a903cdee46d71e9028f6c6 +#define je_nallocx nallocx +#define je_posix_memalign posix_memalign +#define je_rallocx rallocx +#define je_realloc realloc +#define je_sallocx sallocx +#define je_sdallocx sdallocx +#define je_xallocx xallocx +#define je_memalign memalign +#define je_valloc valloc #endif diff --git a/contrib/jemalloc-cmake/include_linux_x86_64/jemalloc/jemalloc_typedefs.h b/contrib/jemalloc-cmake/include/jemalloc/jemalloc_typedefs.h similarity index 57% rename from contrib/jemalloc-cmake/include_linux_x86_64/jemalloc/jemalloc_typedefs.h rename to contrib/jemalloc-cmake/include/jemalloc/jemalloc_typedefs.h index 1a58874306e..eeaf7a6760e 100644 --- a/contrib/jemalloc-cmake/include_linux_x86_64/jemalloc/jemalloc_typedefs.h +++ b/contrib/jemalloc-cmake/include/jemalloc/jemalloc_typedefs.h @@ -5,73 +5,66 @@ typedef struct extent_hooks_s extent_hooks_t; * extent_alloc(extent_hooks_t *extent_hooks, void *new_addr, size_t size, * size_t alignment, bool *zero, bool *commit, unsigned arena_ind); */ -typedef void *(extent_alloc_t)(extent_hooks_t *, void *, size_t, size_t, bool *, - bool *, unsigned); +typedef void *(extent_alloc_t)(extent_hooks_t *, void *, size_t, size_t, bool *, bool *, unsigned); /* * bool * extent_dalloc(extent_hooks_t *extent_hooks, void *addr, size_t size, * bool committed, unsigned arena_ind); */ -typedef bool (extent_dalloc_t)(extent_hooks_t *, void *, size_t, bool, - unsigned); +typedef bool(extent_dalloc_t)(extent_hooks_t *, void *, size_t, bool, unsigned); /* * void * extent_destroy(extent_hooks_t *extent_hooks, void *addr, size_t size, * bool committed, unsigned arena_ind); */ -typedef void (extent_destroy_t)(extent_hooks_t *, void *, size_t, bool, - unsigned); +typedef void(extent_destroy_t)(extent_hooks_t *, void *, size_t, bool, unsigned); /* * bool * extent_commit(extent_hooks_t *extent_hooks, void *addr, size_t size, * size_t offset, size_t length, unsigned arena_ind); */ -typedef bool (extent_commit_t)(extent_hooks_t *, void *, size_t, size_t, size_t, - unsigned); +typedef bool(extent_commit_t)(extent_hooks_t *, void *, size_t, size_t, size_t, unsigned); /* * bool * extent_decommit(extent_hooks_t *extent_hooks, void *addr, size_t size, * size_t offset, size_t length, unsigned arena_ind); */ -typedef bool (extent_decommit_t)(extent_hooks_t *, void *, size_t, size_t, - size_t, unsigned); +typedef bool(extent_decommit_t)(extent_hooks_t *, void *, size_t, size_t, size_t, unsigned); /* * bool * extent_purge(extent_hooks_t *extent_hooks, void *addr, size_t size, * size_t offset, size_t length, unsigned arena_ind); */ -typedef bool (extent_purge_t)(extent_hooks_t *, void *, size_t, size_t, size_t, - unsigned); +typedef bool(extent_purge_t)(extent_hooks_t *, void *, size_t, size_t, size_t, unsigned); /* * bool * extent_split(extent_hooks_t *extent_hooks, void *addr, size_t size, * size_t size_a, size_t size_b, bool committed, unsigned arena_ind); */ -typedef bool (extent_split_t)(extent_hooks_t *, void *, size_t, size_t, size_t, - bool, unsigned); +typedef bool(extent_split_t)(extent_hooks_t *, void *, size_t, size_t, size_t, bool, unsigned); /* * bool * extent_merge(extent_hooks_t *extent_hooks, void *addr_a, size_t size_a, * void *addr_b, size_t size_b, bool committed, unsigned arena_ind); */ -typedef bool (extent_merge_t)(extent_hooks_t *, void *, size_t, void *, size_t, - bool, unsigned); +typedef bool(extent_merge_t)(extent_hooks_t *, void *, size_t, void *, size_t, bool, unsigned); -struct extent_hooks_s { - extent_alloc_t *alloc; - extent_dalloc_t *dalloc; - extent_destroy_t *destroy; - extent_commit_t *commit; - extent_decommit_t *decommit; - extent_purge_t *purge_lazy; - extent_purge_t *purge_forced; - extent_split_t *split; - extent_merge_t *merge; +struct extent_hooks_s +{ + extent_alloc_t * alloc; + extent_dalloc_t * dalloc; + extent_destroy_t * destroy; + extent_commit_t * commit; + extent_decommit_t * decommit; + extent_purge_t * purge_lazy; + extent_purge_t * purge_forced; + extent_split_t * split; + extent_merge_t * merge; }; diff --git a/contrib/jemalloc-cmake/include_darwin_aarch64/jemalloc/internal/jemalloc_internal_defs.h.in b/contrib/jemalloc-cmake/include_darwin_aarch64/jemalloc/internal/jemalloc_internal_defs.h.in new file mode 100644 index 00000000000..8ad95c51560 --- /dev/null +++ b/contrib/jemalloc-cmake/include_darwin_aarch64/jemalloc/internal/jemalloc_internal_defs.h.in @@ -0,0 +1,425 @@ +/* include/jemalloc/internal/jemalloc_internal_defs.h. Generated from jemalloc_internal_defs.h.in by configure. */ +#ifndef JEMALLOC_INTERNAL_DEFS_H_ +#define JEMALLOC_INTERNAL_DEFS_H_ +/* + * If JEMALLOC_PREFIX is defined via --with-jemalloc-prefix, it will cause all + * public APIs to be prefixed. This makes it possible, with some care, to use + * multiple allocators simultaneously. + */ +#define JEMALLOC_PREFIX "je_" +#define JEMALLOC_CPREFIX "JE_" + +/* + * Define overrides for non-standard allocator-related functions if they are + * present on the system. + */ +/* #undef JEMALLOC_OVERRIDE___LIBC_CALLOC */ +/* #undef JEMALLOC_OVERRIDE___LIBC_FREE */ +/* #undef JEMALLOC_OVERRIDE___LIBC_MALLOC */ +/* #undef JEMALLOC_OVERRIDE___LIBC_MEMALIGN */ +/* #undef JEMALLOC_OVERRIDE___LIBC_REALLOC */ +/* #undef JEMALLOC_OVERRIDE___LIBC_VALLOC */ +/* #undef JEMALLOC_OVERRIDE___POSIX_MEMALIGN */ + +/* + * JEMALLOC_PRIVATE_NAMESPACE is used as a prefix for all library-private APIs. + * For shared libraries, symbol visibility mechanisms prevent these symbols + * from being exported, but for static libraries, naming collisions are a real + * possibility. + */ +#define JEMALLOC_PRIVATE_NAMESPACE je_ + +/* + * Hyper-threaded CPUs may need a special instruction inside spin loops in + * order to yield to another virtual CPU. + */ +#define CPU_SPINWAIT +/* 1 if CPU_SPINWAIT is defined, 0 otherwise. */ +#define HAVE_CPU_SPINWAIT 0 + +/* + * Number of significant bits in virtual addresses. This may be less than the + * total number of bits in a pointer, e.g. on x64, for which the uppermost 16 + * bits are the same as bit 47. + */ +#define LG_VADDR 64 + +/* Defined if C11 atomics are available. */ +#define JEMALLOC_C11_ATOMICS + +/* Defined if GCC __atomic atomics are available. */ +#define JEMALLOC_GCC_ATOMIC_ATOMICS +/* and the 8-bit variant support. */ +#define JEMALLOC_GCC_U8_ATOMIC_ATOMICS + +/* Defined if GCC __sync atomics are available. */ +#define JEMALLOC_GCC_SYNC_ATOMICS +/* and the 8-bit variant support. */ +#define JEMALLOC_GCC_U8_SYNC_ATOMICS + +/* + * Defined if __builtin_clz() and __builtin_clzl() are available. + */ +#define JEMALLOC_HAVE_BUILTIN_CLZ + +/* + * Defined if os_unfair_lock_*() functions are available, as provided by Darwin. + */ +#define JEMALLOC_OS_UNFAIR_LOCK + +/* Defined if syscall(2) is usable. */ +/* #undef JEMALLOC_USE_SYSCALL */ + +/* + * Defined if secure_getenv(3) is available. + */ +/* #undef JEMALLOC_HAVE_SECURE_GETENV */ + +/* + * Defined if issetugid(2) is available. + */ +#define JEMALLOC_HAVE_ISSETUGID + +/* Defined if pthread_atfork(3) is available. */ +#define JEMALLOC_HAVE_PTHREAD_ATFORK + +/* Defined if pthread_setname_np(3) is available. */ +/* #undef JEMALLOC_HAVE_PTHREAD_SETNAME_NP */ + +/* Defined if pthread_getname_np(3) is available. */ +#define JEMALLOC_HAVE_PTHREAD_GETNAME_NP + +/* Defined if pthread_get_name_np(3) is available. */ +/* #undef JEMALLOC_HAVE_PTHREAD_GET_NAME_NP */ + +/* + * Defined if clock_gettime(CLOCK_MONOTONIC_COARSE, ...) is available. + */ +/* #undef JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE */ + +/* + * Defined if clock_gettime(CLOCK_MONOTONIC, ...) is available. + */ +/* #undef JEMALLOC_HAVE_CLOCK_MONOTONIC */ + +/* + * Defined if mach_absolute_time() is available. + */ +#define JEMALLOC_HAVE_MACH_ABSOLUTE_TIME + +/* + * Defined if clock_gettime(CLOCK_REALTIME, ...) is available. + */ +#define JEMALLOC_HAVE_CLOCK_REALTIME + +/* + * Defined if _malloc_thread_cleanup() exists. At least in the case of + * FreeBSD, pthread_key_create() allocates, which if used during malloc + * bootstrapping will cause recursion into the pthreads library. Therefore, if + * _malloc_thread_cleanup() exists, use it as the basis for thread cleanup in + * malloc_tsd. + */ +/* #undef JEMALLOC_MALLOC_THREAD_CLEANUP */ + +/* + * Defined if threaded initialization is known to be safe on this platform. + * Among other things, it must be possible to initialize a mutex without + * triggering allocation in order for threaded allocation to be safe. + */ +/* #undef JEMALLOC_THREADED_INIT */ + +/* + * Defined if the pthreads implementation defines + * _pthread_mutex_init_calloc_cb(), in which case the function is used in order + * to avoid recursive allocation during mutex initialization. + */ +/* #undef JEMALLOC_MUTEX_INIT_CB */ + +/* Non-empty if the tls_model attribute is supported. */ +#define JEMALLOC_TLS_MODEL __attribute__((tls_model("initial-exec"))) + +/* + * JEMALLOC_DEBUG enables assertions and other sanity checks, and disables + * inline functions. + */ +/* #undef JEMALLOC_DEBUG */ + +/* JEMALLOC_STATS enables statistics calculation. */ +#define JEMALLOC_STATS + +/* JEMALLOC_EXPERIMENTAL_SMALLOCX_API enables experimental smallocx API. */ +/* #undef JEMALLOC_EXPERIMENTAL_SMALLOCX_API */ + +/* JEMALLOC_PROF enables allocation profiling. */ +/* #undef JEMALLOC_PROF */ + +/* Use libunwind for profile backtracing if defined. */ +/* #undef JEMALLOC_PROF_LIBUNWIND */ + +/* Use libgcc for profile backtracing if defined. */ +/* #undef JEMALLOC_PROF_LIBGCC */ + +/* Use gcc intrinsics for profile backtracing if defined. */ +/* #undef JEMALLOC_PROF_GCC */ + +/* + * JEMALLOC_DSS enables use of sbrk(2) to allocate extents from the data storage + * segment (DSS). + */ +/* #undef JEMALLOC_DSS */ + +/* Support memory filling (junk/zero). */ +#define JEMALLOC_FILL + +/* Support utrace(2)-based tracing. */ +/* #undef JEMALLOC_UTRACE */ + +/* Support utrace(2)-based tracing (label based signature). */ +/* #undef JEMALLOC_UTRACE_LABEL */ + +/* Support optional abort() on OOM. */ +/* #undef JEMALLOC_XMALLOC */ + +/* Support lazy locking (avoid locking unless a second thread is launched). */ +/* #undef JEMALLOC_LAZY_LOCK */ + +/* + * Minimum allocation alignment is 2^LG_QUANTUM bytes (ignoring tiny size + * classes). + */ +/* #undef LG_QUANTUM */ + +/* One page is 2^LG_PAGE bytes. */ +#define LG_PAGE 14 + +/* Maximum number of regions in a slab. */ +/* #undef CONFIG_LG_SLAB_MAXREGS */ + +/* + * One huge page is 2^LG_HUGEPAGE bytes. Note that this is defined even if the + * system does not explicitly support huge pages; system calls that require + * explicit huge page support are separately configured. + */ +#define LG_HUGEPAGE 21 + +/* + * If defined, adjacent virtual memory mappings with identical attributes + * automatically coalesce, and they fragment when changes are made to subranges. + * This is the normal order of things for mmap()/munmap(), but on Windows + * VirtualAlloc()/VirtualFree() operations must be precisely matched, i.e. + * mappings do *not* coalesce/fragment. + */ +#define JEMALLOC_MAPS_COALESCE + +/* + * If defined, retain memory for later reuse by default rather than using e.g. + * munmap() to unmap freed extents. This is enabled on 64-bit Linux because + * common sequences of mmap()/munmap() calls will cause virtual memory map + * holes. + */ +/* #undef JEMALLOC_RETAIN */ + +/* TLS is used to map arenas and magazine caches to threads. */ +/* #undef JEMALLOC_TLS */ + +/* + * Used to mark unreachable code to quiet "end of non-void" compiler warnings. + * Don't use this directly; instead use unreachable() from util.h + */ +#define JEMALLOC_INTERNAL_UNREACHABLE __builtin_unreachable + +/* + * ffs*() functions to use for bitmapping. Don't use these directly; instead, + * use ffs_*() from util.h. + */ +#define JEMALLOC_INTERNAL_FFSLL __builtin_ffsll +#define JEMALLOC_INTERNAL_FFSL __builtin_ffsl +#define JEMALLOC_INTERNAL_FFS __builtin_ffs + +/* + * popcount*() functions to use for bitmapping. + */ +#define JEMALLOC_INTERNAL_POPCOUNTL __builtin_popcountl +#define JEMALLOC_INTERNAL_POPCOUNT __builtin_popcount + +/* + * If defined, explicitly attempt to more uniformly distribute large allocation + * pointer alignments across all cache indices. + */ +#define JEMALLOC_CACHE_OBLIVIOUS + +/* + * If defined, enable logging facilities. We make this a configure option to + * avoid taking extra branches everywhere. + */ +/* #undef JEMALLOC_LOG */ + +/* + * If defined, use readlinkat() (instead of readlink()) to follow + * /etc/malloc_conf. + */ +/* #undef JEMALLOC_READLINKAT */ + +/* + * Darwin (OS X) uses zones to work around Mach-O symbol override shortcomings. + */ +#define JEMALLOC_ZONE + +/* + * Methods for determining whether the OS overcommits. + * JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY: Linux's + * /proc/sys/vm.overcommit_memory file. + * JEMALLOC_SYSCTL_VM_OVERCOMMIT: FreeBSD's vm.overcommit sysctl. + */ +/* #undef JEMALLOC_SYSCTL_VM_OVERCOMMIT */ +/* #undef JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY */ + +/* Defined if madvise(2) is available. */ +#define JEMALLOC_HAVE_MADVISE + +/* + * Defined if transparent huge pages are supported via the MADV_[NO]HUGEPAGE + * arguments to madvise(2). + */ +/* #undef JEMALLOC_HAVE_MADVISE_HUGE */ + +/* + * Methods for purging unused pages differ between operating systems. + * + * madvise(..., MADV_FREE) : This marks pages as being unused, such that they + * will be discarded rather than swapped out. + * madvise(..., MADV_DONTNEED) : If JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS is + * defined, this immediately discards pages, + * such that new pages will be demand-zeroed if + * the address region is later touched; + * otherwise this behaves similarly to + * MADV_FREE, though typically with higher + * system overhead. + */ +#define JEMALLOC_PURGE_MADVISE_FREE +#define JEMALLOC_PURGE_MADVISE_DONTNEED +/* #undef JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS */ + +/* Defined if madvise(2) is available but MADV_FREE is not (x86 Linux only). */ +/* #undef JEMALLOC_DEFINE_MADVISE_FREE */ + +/* + * Defined if MADV_DO[NT]DUMP is supported as an argument to madvise. + */ +/* #undef JEMALLOC_MADVISE_DONTDUMP */ + +/* + * Defined if MADV_[NO]CORE is supported as an argument to madvise. + */ +/* #undef JEMALLOC_MADVISE_NOCORE */ + +/* Defined if mprotect(2) is available. */ +#define JEMALLOC_HAVE_MPROTECT + +/* + * Defined if transparent huge pages (THPs) are supported via the + * MADV_[NO]HUGEPAGE arguments to madvise(2), and THP support is enabled. + */ +/* #undef JEMALLOC_THP */ + +/* Defined if posix_madvise is available. */ +/* #undef JEMALLOC_HAVE_POSIX_MADVISE */ + +/* + * Method for purging unused pages using posix_madvise. + * + * posix_madvise(..., POSIX_MADV_DONTNEED) + */ +/* #undef JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED */ +/* #undef JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED_ZEROS */ + +/* + * Defined if memcntl page admin call is supported + */ +/* #undef JEMALLOC_HAVE_MEMCNTL */ + +/* + * Defined if malloc_size is supported + */ +#define JEMALLOC_HAVE_MALLOC_SIZE + +/* Define if operating system has alloca.h header. */ +/* #undef JEMALLOC_HAS_ALLOCA_H */ + +/* C99 restrict keyword supported. */ +#define JEMALLOC_HAS_RESTRICT + +/* For use by hash code. */ +/* #undef JEMALLOC_BIG_ENDIAN */ + +/* sizeof(int) == 2^LG_SIZEOF_INT. */ +#define LG_SIZEOF_INT 2 + +/* sizeof(long) == 2^LG_SIZEOF_LONG. */ +#define LG_SIZEOF_LONG 3 + +/* sizeof(long long) == 2^LG_SIZEOF_LONG_LONG. */ +#define LG_SIZEOF_LONG_LONG 3 + +/* sizeof(intmax_t) == 2^LG_SIZEOF_INTMAX_T. */ +#define LG_SIZEOF_INTMAX_T 3 + +/* glibc malloc hooks (__malloc_hook, __realloc_hook, __free_hook). */ +/* #undef JEMALLOC_GLIBC_MALLOC_HOOK */ + +/* glibc memalign hook. */ +/* #undef JEMALLOC_GLIBC_MEMALIGN_HOOK */ + +/* pthread support */ +#define JEMALLOC_HAVE_PTHREAD + +/* dlsym() support */ +#define JEMALLOC_HAVE_DLSYM + +/* Adaptive mutex support in pthreads. */ +/* #undef JEMALLOC_HAVE_PTHREAD_MUTEX_ADAPTIVE_NP */ + +/* GNU specific sched_getcpu support */ +/* #undef JEMALLOC_HAVE_SCHED_GETCPU */ + +/* GNU specific sched_setaffinity support */ +/* #undef JEMALLOC_HAVE_SCHED_SETAFFINITY */ + +/* + * If defined, all the features necessary for background threads are present. + */ +/* #undef JEMALLOC_BACKGROUND_THREAD */ + +/* + * If defined, jemalloc symbols are not exported (doesn't work when + * JEMALLOC_PREFIX is not defined). + */ +/* #undef JEMALLOC_EXPORT */ + +/* config.malloc_conf options string. */ +#define JEMALLOC_CONFIG_MALLOC_CONF "@JEMALLOC_CONFIG_MALLOC_CONF@" + +/* If defined, jemalloc takes the malloc/free/etc. symbol names. */ +/* #undef JEMALLOC_IS_MALLOC */ + +/* + * Defined if strerror_r returns char * if _GNU_SOURCE is defined. + */ +/* #undef JEMALLOC_STRERROR_R_RETURNS_CHAR_WITH_GNU_SOURCE */ + +/* Performs additional safety checks when defined. */ +/* #undef JEMALLOC_OPT_SAFETY_CHECKS */ + +/* Is C++ support being built? */ +/* #undef JEMALLOC_ENABLE_CXX */ + +/* Performs additional size checks when defined. */ +/* #undef JEMALLOC_OPT_SIZE_CHECKS */ + +/* Allows sampled junk and stash for checking use-after-free when defined. */ +/* #undef JEMALLOC_UAF_DETECTION */ + +/* Darwin VM_MAKE_TAG support */ +#define JEMALLOC_HAVE_VM_MAKE_TAG + +#endif /* JEMALLOC_INTERNAL_DEFS_H_ */ diff --git a/contrib/jemalloc-cmake/include_darwin_x86_64/jemalloc/internal/jemalloc_internal_defs.h.in b/contrib/jemalloc-cmake/include_darwin_x86_64/jemalloc/internal/jemalloc_internal_defs.h.in new file mode 100644 index 00000000000..8671da5db69 --- /dev/null +++ b/contrib/jemalloc-cmake/include_darwin_x86_64/jemalloc/internal/jemalloc_internal_defs.h.in @@ -0,0 +1,425 @@ +/* include/jemalloc/internal/jemalloc_internal_defs.h. Generated from jemalloc_internal_defs.h.in by configure. */ +#ifndef JEMALLOC_INTERNAL_DEFS_H_ +#define JEMALLOC_INTERNAL_DEFS_H_ +/* + * If JEMALLOC_PREFIX is defined via --with-jemalloc-prefix, it will cause all + * public APIs to be prefixed. This makes it possible, with some care, to use + * multiple allocators simultaneously. + */ +#define JEMALLOC_PREFIX "je_" +#define JEMALLOC_CPREFIX "JE_" + +/* + * Define overrides for non-standard allocator-related functions if they are + * present on the system. + */ +/* #undef JEMALLOC_OVERRIDE___LIBC_CALLOC */ +/* #undef JEMALLOC_OVERRIDE___LIBC_FREE */ +/* #undef JEMALLOC_OVERRIDE___LIBC_MALLOC */ +/* #undef JEMALLOC_OVERRIDE___LIBC_MEMALIGN */ +/* #undef JEMALLOC_OVERRIDE___LIBC_REALLOC */ +/* #undef JEMALLOC_OVERRIDE___LIBC_VALLOC */ +/* #undef JEMALLOC_OVERRIDE___POSIX_MEMALIGN */ + +/* + * JEMALLOC_PRIVATE_NAMESPACE is used as a prefix for all library-private APIs. + * For shared libraries, symbol visibility mechanisms prevent these symbols + * from being exported, but for static libraries, naming collisions are a real + * possibility. + */ +#define JEMALLOC_PRIVATE_NAMESPACE je_ + +/* + * Hyper-threaded CPUs may need a special instruction inside spin loops in + * order to yield to another virtual CPU. + */ +#define CPU_SPINWAIT __asm__ volatile("pause") +/* 1 if CPU_SPINWAIT is defined, 0 otherwise. */ +#define HAVE_CPU_SPINWAIT 1 + +/* + * Number of significant bits in virtual addresses. This may be less than the + * total number of bits in a pointer, e.g. on x64, for which the uppermost 16 + * bits are the same as bit 47. + */ +#define LG_VADDR 48 + +/* Defined if C11 atomics are available. */ +#define JEMALLOC_C11_ATOMICS + +/* Defined if GCC __atomic atomics are available. */ +#define JEMALLOC_GCC_ATOMIC_ATOMICS +/* and the 8-bit variant support. */ +#define JEMALLOC_GCC_U8_ATOMIC_ATOMICS + +/* Defined if GCC __sync atomics are available. */ +#define JEMALLOC_GCC_SYNC_ATOMICS +/* and the 8-bit variant support. */ +#define JEMALLOC_GCC_U8_SYNC_ATOMICS + +/* + * Defined if __builtin_clz() and __builtin_clzl() are available. + */ +#define JEMALLOC_HAVE_BUILTIN_CLZ + +/* + * Defined if os_unfair_lock_*() functions are available, as provided by Darwin. + */ +#define JEMALLOC_OS_UNFAIR_LOCK + +/* Defined if syscall(2) is usable. */ +/* #undef JEMALLOC_USE_SYSCALL */ + +/* + * Defined if secure_getenv(3) is available. + */ +/* #undef JEMALLOC_HAVE_SECURE_GETENV */ + +/* + * Defined if issetugid(2) is available. + */ +#define JEMALLOC_HAVE_ISSETUGID + +/* Defined if pthread_atfork(3) is available. */ +#define JEMALLOC_HAVE_PTHREAD_ATFORK + +/* Defined if pthread_setname_np(3) is available. */ +/* #undef JEMALLOC_HAVE_PTHREAD_SETNAME_NP */ + +/* Defined if pthread_getname_np(3) is available. */ +#define JEMALLOC_HAVE_PTHREAD_GETNAME_NP + +/* Defined if pthread_get_name_np(3) is available. */ +/* #undef JEMALLOC_HAVE_PTHREAD_GET_NAME_NP */ + +/* + * Defined if clock_gettime(CLOCK_MONOTONIC_COARSE, ...) is available. + */ +/* #undef JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE */ + +/* + * Defined if clock_gettime(CLOCK_MONOTONIC, ...) is available. + */ +/* #undef JEMALLOC_HAVE_CLOCK_MONOTONIC */ + +/* + * Defined if mach_absolute_time() is available. + */ +#define JEMALLOC_HAVE_MACH_ABSOLUTE_TIME + +/* + * Defined if clock_gettime(CLOCK_REALTIME, ...) is available. + */ +#define JEMALLOC_HAVE_CLOCK_REALTIME + +/* + * Defined if _malloc_thread_cleanup() exists. At least in the case of + * FreeBSD, pthread_key_create() allocates, which if used during malloc + * bootstrapping will cause recursion into the pthreads library. Therefore, if + * _malloc_thread_cleanup() exists, use it as the basis for thread cleanup in + * malloc_tsd. + */ +/* #undef JEMALLOC_MALLOC_THREAD_CLEANUP */ + +/* + * Defined if threaded initialization is known to be safe on this platform. + * Among other things, it must be possible to initialize a mutex without + * triggering allocation in order for threaded allocation to be safe. + */ +/* #undef JEMALLOC_THREADED_INIT */ + +/* + * Defined if the pthreads implementation defines + * _pthread_mutex_init_calloc_cb(), in which case the function is used in order + * to avoid recursive allocation during mutex initialization. + */ +/* #undef JEMALLOC_MUTEX_INIT_CB */ + +/* Non-empty if the tls_model attribute is supported. */ +#define JEMALLOC_TLS_MODEL __attribute__((tls_model("initial-exec"))) + +/* + * JEMALLOC_DEBUG enables assertions and other sanity checks, and disables + * inline functions. + */ +/* #undef JEMALLOC_DEBUG */ + +/* JEMALLOC_STATS enables statistics calculation. */ +#define JEMALLOC_STATS + +/* JEMALLOC_EXPERIMENTAL_SMALLOCX_API enables experimental smallocx API. */ +/* #undef JEMALLOC_EXPERIMENTAL_SMALLOCX_API */ + +/* JEMALLOC_PROF enables allocation profiling. */ +/* #undef JEMALLOC_PROF */ + +/* Use libunwind for profile backtracing if defined. */ +/* #undef JEMALLOC_PROF_LIBUNWIND */ + +/* Use libgcc for profile backtracing if defined. */ +/* #undef JEMALLOC_PROF_LIBGCC */ + +/* Use gcc intrinsics for profile backtracing if defined. */ +/* #undef JEMALLOC_PROF_GCC */ + +/* + * JEMALLOC_DSS enables use of sbrk(2) to allocate extents from the data storage + * segment (DSS). + */ +/* #undef JEMALLOC_DSS */ + +/* Support memory filling (junk/zero). */ +#define JEMALLOC_FILL + +/* Support utrace(2)-based tracing. */ +/* #undef JEMALLOC_UTRACE */ + +/* Support utrace(2)-based tracing (label based signature). */ +/* #undef JEMALLOC_UTRACE_LABEL */ + +/* Support optional abort() on OOM. */ +/* #undef JEMALLOC_XMALLOC */ + +/* Support lazy locking (avoid locking unless a second thread is launched). */ +/* #undef JEMALLOC_LAZY_LOCK */ + +/* + * Minimum allocation alignment is 2^LG_QUANTUM bytes (ignoring tiny size + * classes). + */ +/* #undef LG_QUANTUM */ + +/* One page is 2^LG_PAGE bytes. */ +#define LG_PAGE 12 + +/* Maximum number of regions in a slab. */ +/* #undef CONFIG_LG_SLAB_MAXREGS */ + +/* + * One huge page is 2^LG_HUGEPAGE bytes. Note that this is defined even if the + * system does not explicitly support huge pages; system calls that require + * explicit huge page support are separately configured. + */ +#define LG_HUGEPAGE 21 + +/* + * If defined, adjacent virtual memory mappings with identical attributes + * automatically coalesce, and they fragment when changes are made to subranges. + * This is the normal order of things for mmap()/munmap(), but on Windows + * VirtualAlloc()/VirtualFree() operations must be precisely matched, i.e. + * mappings do *not* coalesce/fragment. + */ +#define JEMALLOC_MAPS_COALESCE + +/* + * If defined, retain memory for later reuse by default rather than using e.g. + * munmap() to unmap freed extents. This is enabled on 64-bit Linux because + * common sequences of mmap()/munmap() calls will cause virtual memory map + * holes. + */ +/* #undef JEMALLOC_RETAIN */ + +/* TLS is used to map arenas and magazine caches to threads. */ +/* #undef JEMALLOC_TLS */ + +/* + * Used to mark unreachable code to quiet "end of non-void" compiler warnings. + * Don't use this directly; instead use unreachable() from util.h + */ +#define JEMALLOC_INTERNAL_UNREACHABLE __builtin_unreachable + +/* + * ffs*() functions to use for bitmapping. Don't use these directly; instead, + * use ffs_*() from util.h. + */ +#define JEMALLOC_INTERNAL_FFSLL __builtin_ffsll +#define JEMALLOC_INTERNAL_FFSL __builtin_ffsl +#define JEMALLOC_INTERNAL_FFS __builtin_ffs + +/* + * popcount*() functions to use for bitmapping. + */ +#define JEMALLOC_INTERNAL_POPCOUNTL __builtin_popcountl +#define JEMALLOC_INTERNAL_POPCOUNT __builtin_popcount + +/* + * If defined, explicitly attempt to more uniformly distribute large allocation + * pointer alignments across all cache indices. + */ +#define JEMALLOC_CACHE_OBLIVIOUS + +/* + * If defined, enable logging facilities. We make this a configure option to + * avoid taking extra branches everywhere. + */ +/* #undef JEMALLOC_LOG */ + +/* + * If defined, use readlinkat() (instead of readlink()) to follow + * /etc/malloc_conf. + */ +/* #undef JEMALLOC_READLINKAT */ + +/* + * Darwin (OS X) uses zones to work around Mach-O symbol override shortcomings. + */ +#define JEMALLOC_ZONE + +/* + * Methods for determining whether the OS overcommits. + * JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY: Linux's + * /proc/sys/vm.overcommit_memory file. + * JEMALLOC_SYSCTL_VM_OVERCOMMIT: FreeBSD's vm.overcommit sysctl. + */ +/* #undef JEMALLOC_SYSCTL_VM_OVERCOMMIT */ +/* #undef JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY */ + +/* Defined if madvise(2) is available. */ +#define JEMALLOC_HAVE_MADVISE + +/* + * Defined if transparent huge pages are supported via the MADV_[NO]HUGEPAGE + * arguments to madvise(2). + */ +/* #undef JEMALLOC_HAVE_MADVISE_HUGE */ + +/* + * Methods for purging unused pages differ between operating systems. + * + * madvise(..., MADV_FREE) : This marks pages as being unused, such that they + * will be discarded rather than swapped out. + * madvise(..., MADV_DONTNEED) : If JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS is + * defined, this immediately discards pages, + * such that new pages will be demand-zeroed if + * the address region is later touched; + * otherwise this behaves similarly to + * MADV_FREE, though typically with higher + * system overhead. + */ +#define JEMALLOC_PURGE_MADVISE_FREE +#define JEMALLOC_PURGE_MADVISE_DONTNEED +/* #undef JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS */ + +/* Defined if madvise(2) is available but MADV_FREE is not (x86 Linux only). */ +/* #undef JEMALLOC_DEFINE_MADVISE_FREE */ + +/* + * Defined if MADV_DO[NT]DUMP is supported as an argument to madvise. + */ +/* #undef JEMALLOC_MADVISE_DONTDUMP */ + +/* + * Defined if MADV_[NO]CORE is supported as an argument to madvise. + */ +/* #undef JEMALLOC_MADVISE_NOCORE */ + +/* Defined if mprotect(2) is available. */ +#define JEMALLOC_HAVE_MPROTECT + +/* + * Defined if transparent huge pages (THPs) are supported via the + * MADV_[NO]HUGEPAGE arguments to madvise(2), and THP support is enabled. + */ +/* #undef JEMALLOC_THP */ + +/* Defined if posix_madvise is available. */ +/* #undef JEMALLOC_HAVE_POSIX_MADVISE */ + +/* + * Method for purging unused pages using posix_madvise. + * + * posix_madvise(..., POSIX_MADV_DONTNEED) + */ +/* #undef JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED */ +/* #undef JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED_ZEROS */ + +/* + * Defined if memcntl page admin call is supported + */ +/* #undef JEMALLOC_HAVE_MEMCNTL */ + +/* + * Defined if malloc_size is supported + */ +#define JEMALLOC_HAVE_MALLOC_SIZE + +/* Define if operating system has alloca.h header. */ +/* #undef JEMALLOC_HAS_ALLOCA_H */ + +/* C99 restrict keyword supported. */ +#define JEMALLOC_HAS_RESTRICT + +/* For use by hash code. */ +/* #undef JEMALLOC_BIG_ENDIAN */ + +/* sizeof(int) == 2^LG_SIZEOF_INT. */ +#define LG_SIZEOF_INT 2 + +/* sizeof(long) == 2^LG_SIZEOF_LONG. */ +#define LG_SIZEOF_LONG 3 + +/* sizeof(long long) == 2^LG_SIZEOF_LONG_LONG. */ +#define LG_SIZEOF_LONG_LONG 3 + +/* sizeof(intmax_t) == 2^LG_SIZEOF_INTMAX_T. */ +#define LG_SIZEOF_INTMAX_T 3 + +/* glibc malloc hooks (__malloc_hook, __realloc_hook, __free_hook). */ +/* #undef JEMALLOC_GLIBC_MALLOC_HOOK */ + +/* glibc memalign hook. */ +/* #undef JEMALLOC_GLIBC_MEMALIGN_HOOK */ + +/* pthread support */ +#define JEMALLOC_HAVE_PTHREAD + +/* dlsym() support */ +#define JEMALLOC_HAVE_DLSYM + +/* Adaptive mutex support in pthreads. */ +/* #undef JEMALLOC_HAVE_PTHREAD_MUTEX_ADAPTIVE_NP */ + +/* GNU specific sched_getcpu support */ +/* #undef JEMALLOC_HAVE_SCHED_GETCPU */ + +/* GNU specific sched_setaffinity support */ +/* #undef JEMALLOC_HAVE_SCHED_SETAFFINITY */ + +/* + * If defined, all the features necessary for background threads are present. + */ +/* #undef JEMALLOC_BACKGROUND_THREAD */ + +/* + * If defined, jemalloc symbols are not exported (doesn't work when + * JEMALLOC_PREFIX is not defined). + */ +/* #undef JEMALLOC_EXPORT */ + +/* config.malloc_conf options string. */ +#define JEMALLOC_CONFIG_MALLOC_CONF "@JEMALLOC_CONFIG_MALLOC_CONF@" + +/* If defined, jemalloc takes the malloc/free/etc. symbol names. */ +/* #undef JEMALLOC_IS_MALLOC */ + +/* + * Defined if strerror_r returns char * if _GNU_SOURCE is defined. + */ +/* #undef JEMALLOC_STRERROR_R_RETURNS_CHAR_WITH_GNU_SOURCE */ + +/* Performs additional safety checks when defined. */ +/* #undef JEMALLOC_OPT_SAFETY_CHECKS */ + +/* Is C++ support being built? */ +/* #undef JEMALLOC_ENABLE_CXX */ + +/* Performs additional size checks when defined. */ +/* #undef JEMALLOC_OPT_SIZE_CHECKS */ + +/* Allows sampled junk and stash for checking use-after-free when defined. */ +/* #undef JEMALLOC_UAF_DETECTION */ + +/* Darwin VM_MAKE_TAG support */ +#define JEMALLOC_HAVE_VM_MAKE_TAG + +#endif /* JEMALLOC_INTERNAL_DEFS_H_ */ diff --git a/contrib/jemalloc-cmake/include_freebsd_aarch64/jemalloc/internal/jemalloc_internal_defs.h.in b/contrib/jemalloc-cmake/include_freebsd_aarch64/jemalloc/internal/jemalloc_internal_defs.h.in new file mode 100644 index 00000000000..0f61417d65f --- /dev/null +++ b/contrib/jemalloc-cmake/include_freebsd_aarch64/jemalloc/internal/jemalloc_internal_defs.h.in @@ -0,0 +1,427 @@ +/* include/jemalloc/internal/jemalloc_internal_defs.h. Generated from jemalloc_internal_defs.h.in by configure. */ +#ifndef JEMALLOC_INTERNAL_DEFS_H_ +#define JEMALLOC_INTERNAL_DEFS_H_ +/* + * If JEMALLOC_PREFIX is defined via --with-jemalloc-prefix, it will cause all + * public APIs to be prefixed. This makes it possible, with some care, to use + * multiple allocators simultaneously. + */ +/* #undef JEMALLOC_PREFIX */ +/* #undef JEMALLOC_CPREFIX */ + +/* + * Define overrides for non-standard allocator-related functions if they are + * present on the system. + */ +/* #undef JEMALLOC_OVERRIDE___LIBC_CALLOC */ +/* #undef JEMALLOC_OVERRIDE___LIBC_FREE */ +/* #undef JEMALLOC_OVERRIDE___LIBC_MALLOC */ +/* #undef JEMALLOC_OVERRIDE___LIBC_MEMALIGN */ +/* #undef JEMALLOC_OVERRIDE___LIBC_REALLOC */ +/* #undef JEMALLOC_OVERRIDE___LIBC_VALLOC */ +#define JEMALLOC_OVERRIDE___POSIX_MEMALIGN + +/* + * JEMALLOC_PRIVATE_NAMESPACE is used as a prefix for all library-private APIs. + * For shared libraries, symbol visibility mechanisms prevent these symbols + * from being exported, but for static libraries, naming collisions are a real + * possibility. + */ +#define JEMALLOC_PRIVATE_NAMESPACE je_ + +/* + * Hyper-threaded CPUs may need a special instruction inside spin loops in + * order to yield to another virtual CPU. + */ +#define CPU_SPINWAIT +/* 1 if CPU_SPINWAIT is defined, 0 otherwise. */ +#define HAVE_CPU_SPINWAIT 0 + +/* + * Number of significant bits in virtual addresses. This may be less than the + * total number of bits in a pointer, e.g. on x64, for which the uppermost 16 + * bits are the same as bit 47. + */ +#define LG_VADDR 48 + +/* Defined if C11 atomics are available. */ +#define JEMALLOC_C11_ATOMICS + +/* Defined if GCC __atomic atomics are available. */ +#define JEMALLOC_GCC_ATOMIC_ATOMICS +/* and the 8-bit variant support. */ +#define JEMALLOC_GCC_U8_ATOMIC_ATOMICS + +/* Defined if GCC __sync atomics are available. */ +#define JEMALLOC_GCC_SYNC_ATOMICS +/* and the 8-bit variant support. */ +#define JEMALLOC_GCC_U8_SYNC_ATOMICS + +/* + * Defined if __builtin_clz() and __builtin_clzl() are available. + */ +#define JEMALLOC_HAVE_BUILTIN_CLZ + +/* + * Defined if os_unfair_lock_*() functions are available, as provided by Darwin. + */ +/* #undef JEMALLOC_OS_UNFAIR_LOCK */ + +/* Defined if syscall(2) is usable. */ +#define JEMALLOC_USE_SYSCALL + +/* + * Defined if secure_getenv(3) is available. + */ +/* #undef JEMALLOC_HAVE_SECURE_GETENV */ + +/* + * Defined if issetugid(2) is available. + */ +#define JEMALLOC_HAVE_ISSETUGID + +/* Defined if pthread_atfork(3) is available. */ +#define JEMALLOC_HAVE_PTHREAD_ATFORK + +/* Only since 12.1-STABLE */ +/* Defined if pthread_setname_np(3) is available. */ +/* #undef JEMALLOC_HAVE_PTHREAD_SETNAME_NP */ + +/* Only since 12.1-STABLE */ +/* Defined if pthread_getname_np(3) is available. */ +/* #undef JEMALLOC_HAVE_PTHREAD_GETNAME_NP */ + +/* Defined if pthread_get_name_np(3) is available. */ +#define JEMALLOC_HAVE_PTHREAD_GET_NAME_NP + +/* + * Defined if clock_gettime(CLOCK_MONOTONIC_COARSE, ...) is available. + */ +/* #undef JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE */ + +/* + * Defined if clock_gettime(CLOCK_MONOTONIC, ...) is available. + */ +#define JEMALLOC_HAVE_CLOCK_MONOTONIC + +/* + * Defined if mach_absolute_time() is available. + */ +/* #undef JEMALLOC_HAVE_MACH_ABSOLUTE_TIME */ + +/* + * Defined if clock_gettime(CLOCK_REALTIME, ...) is available. + */ +#define JEMALLOC_HAVE_CLOCK_REALTIME + +/* + * Defined if _malloc_thread_cleanup() exists. At least in the case of + * FreeBSD, pthread_key_create() allocates, which if used during malloc + * bootstrapping will cause recursion into the pthreads library. Therefore, if + * _malloc_thread_cleanup() exists, use it as the basis for thread cleanup in + * malloc_tsd. + */ +#define JEMALLOC_MALLOC_THREAD_CLEANUP + +/* + * Defined if threaded initialization is known to be safe on this platform. + * Among other things, it must be possible to initialize a mutex without + * triggering allocation in order for threaded allocation to be safe. + */ +/* #undef JEMALLOC_THREADED_INIT */ + +/* + * Defined if the pthreads implementation defines + * _pthread_mutex_init_calloc_cb(), in which case the function is used in order + * to avoid recursive allocation during mutex initialization. + */ +#define JEMALLOC_MUTEX_INIT_CB + +/* Non-empty if the tls_model attribute is supported. */ +#define JEMALLOC_TLS_MODEL __attribute__((tls_model("initial-exec"))) + +/* + * JEMALLOC_DEBUG enables assertions and other sanity checks, and disables + * inline functions. + */ +/* #undef JEMALLOC_DEBUG */ + +/* JEMALLOC_STATS enables statistics calculation. */ +#define JEMALLOC_STATS + +/* JEMALLOC_EXPERIMENTAL_SMALLOCX_API enables experimental smallocx API. */ +/* #undef JEMALLOC_EXPERIMENTAL_SMALLOCX_API */ + +/* JEMALLOC_PROF enables allocation profiling. */ +/* #undef JEMALLOC_PROF */ + +/* Use libunwind for profile backtracing if defined. */ +/* #undef JEMALLOC_PROF_LIBUNWIND */ + +/* Use libgcc for profile backtracing if defined. */ +/* #undef JEMALLOC_PROF_LIBGCC */ + +/* Use gcc intrinsics for profile backtracing if defined. */ +/* #undef JEMALLOC_PROF_GCC */ + +/* + * JEMALLOC_DSS enables use of sbrk(2) to allocate extents from the data storage + * segment (DSS). + */ +#define JEMALLOC_DSS + +/* Support memory filling (junk/zero). */ +#define JEMALLOC_FILL + +/* Support utrace(2)-based tracing. */ +/* #undef JEMALLOC_UTRACE */ + +/* Support utrace(2)-based tracing (label based signature). */ +/* #undef JEMALLOC_UTRACE_LABEL */ + +/* Support optional abort() on OOM. */ +/* #undef JEMALLOC_XMALLOC */ + +/* Support lazy locking (avoid locking unless a second thread is launched). */ +#define JEMALLOC_LAZY_LOCK + +/* + * Minimum allocation alignment is 2^LG_QUANTUM bytes (ignoring tiny size + * classes). + */ +/* #undef LG_QUANTUM */ + +/* One page is 2^LG_PAGE bytes. */ +#define LG_PAGE 16 + +/* Maximum number of regions in a slab. */ +/* #undef CONFIG_LG_SLAB_MAXREGS */ + +/* + * One huge page is 2^LG_HUGEPAGE bytes. Note that this is defined even if the + * system does not explicitly support huge pages; system calls that require + * explicit huge page support are separately configured. + */ +#define LG_HUGEPAGE 29 + +/* + * If defined, adjacent virtual memory mappings with identical attributes + * automatically coalesce, and they fragment when changes are made to subranges. + * This is the normal order of things for mmap()/munmap(), but on Windows + * VirtualAlloc()/VirtualFree() operations must be precisely matched, i.e. + * mappings do *not* coalesce/fragment. + */ +#define JEMALLOC_MAPS_COALESCE + +/* + * If defined, retain memory for later reuse by default rather than using e.g. + * munmap() to unmap freed extents. This is enabled on 64-bit Linux because + * common sequences of mmap()/munmap() calls will cause virtual memory map + * holes. + */ +/* #undef JEMALLOC_RETAIN */ + +/* TLS is used to map arenas and magazine caches to threads. */ +#define JEMALLOC_TLS + +/* + * Used to mark unreachable code to quiet "end of non-void" compiler warnings. + * Don't use this directly; instead use unreachable() from util.h + */ +#define JEMALLOC_INTERNAL_UNREACHABLE __builtin_unreachable + +/* + * ffs*() functions to use for bitmapping. Don't use these directly; instead, + * use ffs_*() from util.h. + */ +#define JEMALLOC_INTERNAL_FFSLL __builtin_ffsll +#define JEMALLOC_INTERNAL_FFSL __builtin_ffsl +#define JEMALLOC_INTERNAL_FFS __builtin_ffs + +/* + * popcount*() functions to use for bitmapping. + */ +#define JEMALLOC_INTERNAL_POPCOUNTL __builtin_popcountl +#define JEMALLOC_INTERNAL_POPCOUNT __builtin_popcount + +/* + * If defined, explicitly attempt to more uniformly distribute large allocation + * pointer alignments across all cache indices. + */ +#define JEMALLOC_CACHE_OBLIVIOUS + +/* + * If defined, enable logging facilities. We make this a configure option to + * avoid taking extra branches everywhere. + */ +/* #undef JEMALLOC_LOG */ + +/* + * If defined, use readlinkat() (instead of readlink()) to follow + * /etc/malloc_conf. + */ +/* #undef JEMALLOC_READLINKAT */ + +/* + * Darwin (OS X) uses zones to work around Mach-O symbol override shortcomings. + */ +/* #undef JEMALLOC_ZONE */ + +/* + * Methods for determining whether the OS overcommits. + * JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY: Linux's + * /proc/sys/vm.overcommit_memory file. + * JEMALLOC_SYSCTL_VM_OVERCOMMIT: FreeBSD's vm.overcommit sysctl. + */ +#define JEMALLOC_SYSCTL_VM_OVERCOMMIT +/* #undef JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY */ + +/* Defined if madvise(2) is available. */ +#define JEMALLOC_HAVE_MADVISE + +/* + * Defined if transparent huge pages are supported via the MADV_[NO]HUGEPAGE + * arguments to madvise(2). + */ +/* #undef JEMALLOC_HAVE_MADVISE_HUGE */ + +/* + * Methods for purging unused pages differ between operating systems. + * + * madvise(..., MADV_FREE) : This marks pages as being unused, such that they + * will be discarded rather than swapped out. + * madvise(..., MADV_DONTNEED) : If JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS is + * defined, this immediately discards pages, + * such that new pages will be demand-zeroed if + * the address region is later touched; + * otherwise this behaves similarly to + * MADV_FREE, though typically with higher + * system overhead. + */ +#define JEMALLOC_PURGE_MADVISE_FREE +#define JEMALLOC_PURGE_MADVISE_DONTNEED +/* #undef JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS */ + +/* Defined if madvise(2) is available but MADV_FREE is not (x86 Linux only). */ +/* #undef JEMALLOC_DEFINE_MADVISE_FREE */ + +/* + * Defined if MADV_DO[NT]DUMP is supported as an argument to madvise. + */ +/* #undef JEMALLOC_MADVISE_DONTDUMP */ + +/* + * Defined if MADV_[NO]CORE is supported as an argument to madvise. + */ +#define JEMALLOC_MADVISE_NOCORE + +/* Defined if mprotect(2) is available. */ +#define JEMALLOC_HAVE_MPROTECT + +/* + * Defined if transparent huge pages (THPs) are supported via the + * MADV_[NO]HUGEPAGE arguments to madvise(2), and THP support is enabled. + */ +/* #undef JEMALLOC_THP */ + +/* Defined if posix_madvise is available. */ +/* #undef JEMALLOC_HAVE_POSIX_MADVISE */ + +/* + * Method for purging unused pages using posix_madvise. + * + * posix_madvise(..., POSIX_MADV_DONTNEED) + */ +/* #undef JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED */ +/* #undef JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED_ZEROS */ + +/* + * Defined if memcntl page admin call is supported + */ +/* #undef JEMALLOC_HAVE_MEMCNTL */ + +/* + * Defined if malloc_size is supported + */ +/* #undef JEMALLOC_HAVE_MALLOC_SIZE */ + +/* Define if operating system has alloca.h header. */ +/* #undef JEMALLOC_HAS_ALLOCA_H */ + +/* C99 restrict keyword supported. */ +#define JEMALLOC_HAS_RESTRICT + +/* For use by hash code. */ +/* #undef JEMALLOC_BIG_ENDIAN */ + +/* sizeof(int) == 2^LG_SIZEOF_INT. */ +#define LG_SIZEOF_INT 2 + +/* sizeof(long) == 2^LG_SIZEOF_LONG. */ +#define LG_SIZEOF_LONG 3 + +/* sizeof(long long) == 2^LG_SIZEOF_LONG_LONG. */ +#define LG_SIZEOF_LONG_LONG 3 + +/* sizeof(intmax_t) == 2^LG_SIZEOF_INTMAX_T. */ +#define LG_SIZEOF_INTMAX_T 3 + +/* glibc malloc hooks (__malloc_hook, __realloc_hook, __free_hook). */ +/* #undef JEMALLOC_GLIBC_MALLOC_HOOK */ + +/* glibc memalign hook. */ +/* #undef JEMALLOC_GLIBC_MEMALIGN_HOOK */ + +/* pthread support */ +#define JEMALLOC_HAVE_PTHREAD + +/* dlsym() support */ +#define JEMALLOC_HAVE_DLSYM + +/* Adaptive mutex support in pthreads. */ +#define JEMALLOC_HAVE_PTHREAD_MUTEX_ADAPTIVE_NP + +/* GNU specific sched_getcpu support */ +/* #undef JEMALLOC_HAVE_SCHED_GETCPU */ + +/* GNU specific sched_setaffinity support */ +/* #undef JEMALLOC_HAVE_SCHED_SETAFFINITY */ + +/* + * If defined, all the features necessary for background threads are present. + */ +#define JEMALLOC_BACKGROUND_THREAD + +/* + * If defined, jemalloc symbols are not exported (doesn't work when + * JEMALLOC_PREFIX is not defined). + */ +/* #undef JEMALLOC_EXPORT */ + +/* config.malloc_conf options string. */ +#define JEMALLOC_CONFIG_MALLOC_CONF "@JEMALLOC_CONFIG_MALLOC_CONF@" + +/* If defined, jemalloc takes the malloc/free/etc. symbol names. */ +#define JEMALLOC_IS_MALLOC + +/* + * Defined if strerror_r returns char * if _GNU_SOURCE is defined. + */ +/* #undef JEMALLOC_STRERROR_R_RETURNS_CHAR_WITH_GNU_SOURCE */ + +/* Performs additional safety checks when defined. */ +/* #undef JEMALLOC_OPT_SAFETY_CHECKS */ + +/* Is C++ support being built? */ +/* #undef JEMALLOC_ENABLE_CXX */ + +/* Performs additional size checks when defined. */ +/* #undef JEMALLOC_OPT_SIZE_CHECKS */ + +/* Allows sampled junk and stash for checking use-after-free when defined. */ +/* #undef JEMALLOC_UAF_DETECTION */ + +/* Darwin VM_MAKE_TAG support */ +/* #undef JEMALLOC_HAVE_VM_MAKE_TAG */ + +#endif /* JEMALLOC_INTERNAL_DEFS_H_ */ diff --git a/contrib/jemalloc-cmake/include_freebsd_x86_64/jemalloc/internal/jemalloc_internal_defs.h.in b/contrib/jemalloc-cmake/include_freebsd_x86_64/jemalloc/internal/jemalloc_internal_defs.h.in new file mode 100644 index 00000000000..32cad025f5f --- /dev/null +++ b/contrib/jemalloc-cmake/include_freebsd_x86_64/jemalloc/internal/jemalloc_internal_defs.h.in @@ -0,0 +1,427 @@ +/* include/jemalloc/internal/jemalloc_internal_defs.h. Generated from jemalloc_internal_defs.h.in by configure. */ +#ifndef JEMALLOC_INTERNAL_DEFS_H_ +#define JEMALLOC_INTERNAL_DEFS_H_ +/* + * If JEMALLOC_PREFIX is defined via --with-jemalloc-prefix, it will cause all + * public APIs to be prefixed. This makes it possible, with some care, to use + * multiple allocators simultaneously. + */ +/* #undef JEMALLOC_PREFIX */ +/* #undef JEMALLOC_CPREFIX */ + +/* + * Define overrides for non-standard allocator-related functions if they are + * present on the system. + */ +/* #undef JEMALLOC_OVERRIDE___LIBC_CALLOC */ +/* #undef JEMALLOC_OVERRIDE___LIBC_FREE */ +/* #undef JEMALLOC_OVERRIDE___LIBC_MALLOC */ +/* #undef JEMALLOC_OVERRIDE___LIBC_MEMALIGN */ +/* #undef JEMALLOC_OVERRIDE___LIBC_REALLOC */ +/* #undef JEMALLOC_OVERRIDE___LIBC_VALLOC */ +#define JEMALLOC_OVERRIDE___POSIX_MEMALIGN + +/* + * JEMALLOC_PRIVATE_NAMESPACE is used as a prefix for all library-private APIs. + * For shared libraries, symbol visibility mechanisms prevent these symbols + * from being exported, but for static libraries, naming collisions are a real + * possibility. + */ +#define JEMALLOC_PRIVATE_NAMESPACE je_ + +/* + * Hyper-threaded CPUs may need a special instruction inside spin loops in + * order to yield to another virtual CPU. + */ +#define CPU_SPINWAIT __asm__ volatile("pause") +/* 1 if CPU_SPINWAIT is defined, 0 otherwise. */ +#define HAVE_CPU_SPINWAIT 1 + +/* + * Number of significant bits in virtual addresses. This may be less than the + * total number of bits in a pointer, e.g. on x64, for which the uppermost 16 + * bits are the same as bit 47. + */ +#define LG_VADDR 48 + +/* Defined if C11 atomics are available. */ +#define JEMALLOC_C11_ATOMICS + +/* Defined if GCC __atomic atomics are available. */ +#define JEMALLOC_GCC_ATOMIC_ATOMICS +/* and the 8-bit variant support. */ +#define JEMALLOC_GCC_U8_ATOMIC_ATOMICS + +/* Defined if GCC __sync atomics are available. */ +#define JEMALLOC_GCC_SYNC_ATOMICS +/* and the 8-bit variant support. */ +#define JEMALLOC_GCC_U8_SYNC_ATOMICS + +/* + * Defined if __builtin_clz() and __builtin_clzl() are available. + */ +#define JEMALLOC_HAVE_BUILTIN_CLZ + +/* + * Defined if os_unfair_lock_*() functions are available, as provided by Darwin. + */ +/* #undef JEMALLOC_OS_UNFAIR_LOCK */ + +/* Defined if syscall(2) is usable. */ +#define JEMALLOC_USE_SYSCALL + +/* + * Defined if secure_getenv(3) is available. + */ +/* #undef JEMALLOC_HAVE_SECURE_GETENV */ + +/* + * Defined if issetugid(2) is available. + */ +#define JEMALLOC_HAVE_ISSETUGID + +/* Defined if pthread_atfork(3) is available. */ +#define JEMALLOC_HAVE_PTHREAD_ATFORK + +/* Only since 12.1-STABLE */ +/* Defined if pthread_setname_np(3) is available. */ +/* #undef JEMALLOC_HAVE_PTHREAD_SETNAME_NP */ + +/* Only since 12.1-STABLE */ +/* Defined if pthread_getname_np(3) is available. */ +/* #undef JEMALLOC_HAVE_PTHREAD_GETNAME_NP */ + +/* Defined if pthread_get_name_np(3) is available. */ +#define JEMALLOC_HAVE_PTHREAD_GET_NAME_NP + +/* + * Defined if clock_gettime(CLOCK_MONOTONIC_COARSE, ...) is available. + */ +/* #undef JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE */ + +/* + * Defined if clock_gettime(CLOCK_MONOTONIC, ...) is available. + */ +#define JEMALLOC_HAVE_CLOCK_MONOTONIC + +/* + * Defined if mach_absolute_time() is available. + */ +/* #undef JEMALLOC_HAVE_MACH_ABSOLUTE_TIME */ + +/* + * Defined if clock_gettime(CLOCK_REALTIME, ...) is available. + */ +#define JEMALLOC_HAVE_CLOCK_REALTIME + +/* + * Defined if _malloc_thread_cleanup() exists. At least in the case of + * FreeBSD, pthread_key_create() allocates, which if used during malloc + * bootstrapping will cause recursion into the pthreads library. Therefore, if + * _malloc_thread_cleanup() exists, use it as the basis for thread cleanup in + * malloc_tsd. + */ +#define JEMALLOC_MALLOC_THREAD_CLEANUP + +/* + * Defined if threaded initialization is known to be safe on this platform. + * Among other things, it must be possible to initialize a mutex without + * triggering allocation in order for threaded allocation to be safe. + */ +/* #undef JEMALLOC_THREADED_INIT */ + +/* + * Defined if the pthreads implementation defines + * _pthread_mutex_init_calloc_cb(), in which case the function is used in order + * to avoid recursive allocation during mutex initialization. + */ +#define JEMALLOC_MUTEX_INIT_CB + +/* Non-empty if the tls_model attribute is supported. */ +#define JEMALLOC_TLS_MODEL __attribute__((tls_model("initial-exec"))) + +/* + * JEMALLOC_DEBUG enables assertions and other sanity checks, and disables + * inline functions. + */ +/* #undef JEMALLOC_DEBUG */ + +/* JEMALLOC_STATS enables statistics calculation. */ +#define JEMALLOC_STATS + +/* JEMALLOC_EXPERIMENTAL_SMALLOCX_API enables experimental smallocx API. */ +/* #undef JEMALLOC_EXPERIMENTAL_SMALLOCX_API */ + +/* JEMALLOC_PROF enables allocation profiling. */ +/* #undef JEMALLOC_PROF */ + +/* Use libunwind for profile backtracing if defined. */ +/* #undef JEMALLOC_PROF_LIBUNWIND */ + +/* Use libgcc for profile backtracing if defined. */ +/* #undef JEMALLOC_PROF_LIBGCC */ + +/* Use gcc intrinsics for profile backtracing if defined. */ +/* #undef JEMALLOC_PROF_GCC */ + +/* + * JEMALLOC_DSS enables use of sbrk(2) to allocate extents from the data storage + * segment (DSS). + */ +#define JEMALLOC_DSS + +/* Support memory filling (junk/zero). */ +#define JEMALLOC_FILL + +/* Support utrace(2)-based tracing. */ +/* #undef JEMALLOC_UTRACE */ + +/* Support utrace(2)-based tracing (label based signature). */ +/* #undef JEMALLOC_UTRACE_LABEL */ + +/* Support optional abort() on OOM. */ +/* #undef JEMALLOC_XMALLOC */ + +/* Support lazy locking (avoid locking unless a second thread is launched). */ +#define JEMALLOC_LAZY_LOCK + +/* + * Minimum allocation alignment is 2^LG_QUANTUM bytes (ignoring tiny size + * classes). + */ +/* #undef LG_QUANTUM */ + +/* One page is 2^LG_PAGE bytes. */ +#define LG_PAGE 12 + +/* Maximum number of regions in a slab. */ +/* #undef CONFIG_LG_SLAB_MAXREGS */ + +/* + * One huge page is 2^LG_HUGEPAGE bytes. Note that this is defined even if the + * system does not explicitly support huge pages; system calls that require + * explicit huge page support are separately configured. + */ +#define LG_HUGEPAGE 21 + +/* + * If defined, adjacent virtual memory mappings with identical attributes + * automatically coalesce, and they fragment when changes are made to subranges. + * This is the normal order of things for mmap()/munmap(), but on Windows + * VirtualAlloc()/VirtualFree() operations must be precisely matched, i.e. + * mappings do *not* coalesce/fragment. + */ +#define JEMALLOC_MAPS_COALESCE + +/* + * If defined, retain memory for later reuse by default rather than using e.g. + * munmap() to unmap freed extents. This is enabled on 64-bit Linux because + * common sequences of mmap()/munmap() calls will cause virtual memory map + * holes. + */ +/* #undef JEMALLOC_RETAIN */ + +/* TLS is used to map arenas and magazine caches to threads. */ +#define JEMALLOC_TLS + +/* + * Used to mark unreachable code to quiet "end of non-void" compiler warnings. + * Don't use this directly; instead use unreachable() from util.h + */ +#define JEMALLOC_INTERNAL_UNREACHABLE __builtin_unreachable + +/* + * ffs*() functions to use for bitmapping. Don't use these directly; instead, + * use ffs_*() from util.h. + */ +#define JEMALLOC_INTERNAL_FFSLL __builtin_ffsll +#define JEMALLOC_INTERNAL_FFSL __builtin_ffsl +#define JEMALLOC_INTERNAL_FFS __builtin_ffs + +/* + * popcount*() functions to use for bitmapping. + */ +#define JEMALLOC_INTERNAL_POPCOUNTL __builtin_popcountl +#define JEMALLOC_INTERNAL_POPCOUNT __builtin_popcount + +/* + * If defined, explicitly attempt to more uniformly distribute large allocation + * pointer alignments across all cache indices. + */ +#define JEMALLOC_CACHE_OBLIVIOUS + +/* + * If defined, enable logging facilities. We make this a configure option to + * avoid taking extra branches everywhere. + */ +/* #undef JEMALLOC_LOG */ + +/* + * If defined, use readlinkat() (instead of readlink()) to follow + * /etc/malloc_conf. + */ +/* #undef JEMALLOC_READLINKAT */ + +/* + * Darwin (OS X) uses zones to work around Mach-O symbol override shortcomings. + */ +/* #undef JEMALLOC_ZONE */ + +/* + * Methods for determining whether the OS overcommits. + * JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY: Linux's + * /proc/sys/vm.overcommit_memory file. + * JEMALLOC_SYSCTL_VM_OVERCOMMIT: FreeBSD's vm.overcommit sysctl. + */ +#define JEMALLOC_SYSCTL_VM_OVERCOMMIT +/* #undef JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY */ + +/* Defined if madvise(2) is available. */ +#define JEMALLOC_HAVE_MADVISE + +/* + * Defined if transparent huge pages are supported via the MADV_[NO]HUGEPAGE + * arguments to madvise(2). + */ +/* #undef JEMALLOC_HAVE_MADVISE_HUGE */ + +/* + * Methods for purging unused pages differ between operating systems. + * + * madvise(..., MADV_FREE) : This marks pages as being unused, such that they + * will be discarded rather than swapped out. + * madvise(..., MADV_DONTNEED) : If JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS is + * defined, this immediately discards pages, + * such that new pages will be demand-zeroed if + * the address region is later touched; + * otherwise this behaves similarly to + * MADV_FREE, though typically with higher + * system overhead. + */ +#define JEMALLOC_PURGE_MADVISE_FREE +#define JEMALLOC_PURGE_MADVISE_DONTNEED +/* #undef JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS */ + +/* Defined if madvise(2) is available but MADV_FREE is not (x86 Linux only). */ +/* #undef JEMALLOC_DEFINE_MADVISE_FREE */ + +/* + * Defined if MADV_DO[NT]DUMP is supported as an argument to madvise. + */ +/* #undef JEMALLOC_MADVISE_DONTDUMP */ + +/* + * Defined if MADV_[NO]CORE is supported as an argument to madvise. + */ +#define JEMALLOC_MADVISE_NOCORE + +/* Defined if mprotect(2) is available. */ +#define JEMALLOC_HAVE_MPROTECT + +/* + * Defined if transparent huge pages (THPs) are supported via the + * MADV_[NO]HUGEPAGE arguments to madvise(2), and THP support is enabled. + */ +/* #undef JEMALLOC_THP */ + +/* Defined if posix_madvise is available. */ +/* #undef JEMALLOC_HAVE_POSIX_MADVISE */ + +/* + * Method for purging unused pages using posix_madvise. + * + * posix_madvise(..., POSIX_MADV_DONTNEED) + */ +/* #undef JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED */ +/* #undef JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED_ZEROS */ + +/* + * Defined if memcntl page admin call is supported + */ +/* #undef JEMALLOC_HAVE_MEMCNTL */ + +/* + * Defined if malloc_size is supported + */ +/* #undef JEMALLOC_HAVE_MALLOC_SIZE */ + +/* Define if operating system has alloca.h header. */ +/* #undef JEMALLOC_HAS_ALLOCA_H */ + +/* C99 restrict keyword supported. */ +#define JEMALLOC_HAS_RESTRICT + +/* For use by hash code. */ +/* #undef JEMALLOC_BIG_ENDIAN */ + +/* sizeof(int) == 2^LG_SIZEOF_INT. */ +#define LG_SIZEOF_INT 2 + +/* sizeof(long) == 2^LG_SIZEOF_LONG. */ +#define LG_SIZEOF_LONG 3 + +/* sizeof(long long) == 2^LG_SIZEOF_LONG_LONG. */ +#define LG_SIZEOF_LONG_LONG 3 + +/* sizeof(intmax_t) == 2^LG_SIZEOF_INTMAX_T. */ +#define LG_SIZEOF_INTMAX_T 3 + +/* glibc malloc hooks (__malloc_hook, __realloc_hook, __free_hook). */ +/* #undef JEMALLOC_GLIBC_MALLOC_HOOK */ + +/* glibc memalign hook. */ +/* #undef JEMALLOC_GLIBC_MEMALIGN_HOOK */ + +/* pthread support */ +#define JEMALLOC_HAVE_PTHREAD + +/* dlsym() support */ +#define JEMALLOC_HAVE_DLSYM + +/* Adaptive mutex support in pthreads. */ +#define JEMALLOC_HAVE_PTHREAD_MUTEX_ADAPTIVE_NP + +/* GNU specific sched_getcpu support */ +/* #undef JEMALLOC_HAVE_SCHED_GETCPU */ + +/* GNU specific sched_setaffinity support */ +/* #undef JEMALLOC_HAVE_SCHED_SETAFFINITY */ + +/* + * If defined, all the features necessary for background threads are present. + */ +#define JEMALLOC_BACKGROUND_THREAD + +/* + * If defined, jemalloc symbols are not exported (doesn't work when + * JEMALLOC_PREFIX is not defined). + */ +/* #undef JEMALLOC_EXPORT */ + +/* config.malloc_conf options string. */ +#define JEMALLOC_CONFIG_MALLOC_CONF "@JEMALLOC_CONFIG_MALLOC_CONF@" + +/* If defined, jemalloc takes the malloc/free/etc. symbol names. */ +#define JEMALLOC_IS_MALLOC + +/* + * Defined if strerror_r returns char * if _GNU_SOURCE is defined. + */ +/* #undef JEMALLOC_STRERROR_R_RETURNS_CHAR_WITH_GNU_SOURCE */ + +/* Performs additional safety checks when defined. */ +/* #undef JEMALLOC_OPT_SAFETY_CHECKS */ + +/* Is C++ support being built? */ +/* #undef JEMALLOC_ENABLE_CXX */ + +/* Performs additional size checks when defined. */ +/* #undef JEMALLOC_OPT_SIZE_CHECKS */ + +/* Allows sampled junk and stash for checking use-after-free when defined. */ +/* #undef JEMALLOC_UAF_DETECTION */ + +/* Darwin VM_MAKE_TAG support */ +/* #undef JEMALLOC_HAVE_VM_MAKE_TAG */ + +#endif /* JEMALLOC_INTERNAL_DEFS_H_ */ diff --git a/contrib/jemalloc-cmake/include_linux_aarch64/README b/contrib/jemalloc-cmake/include_linux_aarch64/README deleted file mode 100644 index 2ab582803a2..00000000000 --- a/contrib/jemalloc-cmake/include_linux_aarch64/README +++ /dev/null @@ -1,7 +0,0 @@ -Here are pre-generated files from jemalloc on Linux aarch64. -You can obtain these files by running ./autogen.sh inside jemalloc source directory. - -Added #define GNU_SOURCE -Added JEMALLOC_OVERRIDE___POSIX_MEMALIGN because why not. -Removed JEMALLOC_HAVE_ATTR_FORMAT_GNU_PRINTF because it's non standard. -Removed JEMALLOC_PURGE_MADVISE_FREE because it's available only from Linux 4.5. diff --git a/contrib/jemalloc-cmake/include_linux_aarch64/jemalloc/internal/jemalloc_internal_defs.h b/contrib/jemalloc-cmake/include_linux_aarch64/jemalloc/internal/jemalloc_internal_defs.h.in similarity index 80% rename from contrib/jemalloc-cmake/include_linux_aarch64/jemalloc/internal/jemalloc_internal_defs.h rename to contrib/jemalloc-cmake/include_linux_aarch64/jemalloc/internal/jemalloc_internal_defs.h.in index 5e598348e72..ad535e6d773 100644 --- a/contrib/jemalloc-cmake/include_linux_aarch64/jemalloc/internal/jemalloc_internal_defs.h +++ b/contrib/jemalloc-cmake/include_linux_aarch64/jemalloc/internal/jemalloc_internal_defs.h.in @@ -1,12 +1,6 @@ /* include/jemalloc/internal/jemalloc_internal_defs.h. Generated from jemalloc_internal_defs.h.in by configure. */ #ifndef JEMALLOC_INTERNAL_DEFS_H_ #define JEMALLOC_INTERNAL_DEFS_H_ - - -#ifndef _GNU_SOURCE - #define _GNU_SOURCE -#endif - /* * If JEMALLOC_PREFIX is defined via --with-jemalloc-prefix, it will cause all * public APIs to be prefixed. This makes it possible, with some care, to use @@ -19,13 +13,15 @@ * Define overrides for non-standard allocator-related functions if they are * present on the system. */ -#define JEMALLOC_OVERRIDE___LIBC_CALLOC -#define JEMALLOC_OVERRIDE___LIBC_FREE -#define JEMALLOC_OVERRIDE___LIBC_MALLOC -#define JEMALLOC_OVERRIDE___LIBC_MEMALIGN -#define JEMALLOC_OVERRIDE___LIBC_REALLOC -#define JEMALLOC_OVERRIDE___LIBC_VALLOC -#define JEMALLOC_OVERRIDE___POSIX_MEMALIGN +#if !defined(USE_MUSL) + #define JEMALLOC_OVERRIDE___LIBC_CALLOC + #define JEMALLOC_OVERRIDE___LIBC_FREE + #define JEMALLOC_OVERRIDE___LIBC_MALLOC + #define JEMALLOC_OVERRIDE___LIBC_MEMALIGN + #define JEMALLOC_OVERRIDE___LIBC_REALLOC + #define JEMALLOC_OVERRIDE___LIBC_VALLOC +#endif +/* #undef JEMALLOC_OVERRIDE___POSIX_MEMALIGN */ /* * JEMALLOC_PRIVATE_NAMESPACE is used as a prefix for all library-private APIs. @@ -51,29 +47,17 @@ #define LG_VADDR 48 /* Defined if C11 atomics are available. */ -#define JEMALLOC_C11_ATOMICS 1 +#define JEMALLOC_C11_ATOMICS /* Defined if GCC __atomic atomics are available. */ -#define JEMALLOC_GCC_ATOMIC_ATOMICS 1 +#define JEMALLOC_GCC_ATOMIC_ATOMICS +/* and the 8-bit variant support. */ +#define JEMALLOC_GCC_U8_ATOMIC_ATOMICS /* Defined if GCC __sync atomics are available. */ -#define JEMALLOC_GCC_SYNC_ATOMICS 1 - -/* - * Defined if __sync_add_and_fetch(uint32_t *, uint32_t) and - * __sync_sub_and_fetch(uint32_t *, uint32_t) are available, despite - * __GCC_HAVE_SYNC_COMPARE_AND_SWAP_4 not being defined (which means the - * functions are defined in libgcc instead of being inlines). - */ -/* #undef JE_FORCE_SYNC_COMPARE_AND_SWAP_4 */ - -/* - * Defined if __sync_add_and_fetch(uint64_t *, uint64_t) and - * __sync_sub_and_fetch(uint64_t *, uint64_t) are available, despite - * __GCC_HAVE_SYNC_COMPARE_AND_SWAP_8 not being defined (which means the - * functions are defined in libgcc instead of being inlines). - */ -/* #undef JE_FORCE_SYNC_COMPARE_AND_SWAP_8 */ +#define JEMALLOC_GCC_SYNC_ATOMICS +/* and the 8-bit variant support. */ +#define JEMALLOC_GCC_U8_SYNC_ATOMICS /* * Defined if __builtin_clz() and __builtin_clzl() are available. @@ -85,19 +69,13 @@ */ /* #undef JEMALLOC_OS_UNFAIR_LOCK */ -/* - * Defined if OSSpin*() functions are available, as provided by Darwin, and - * documented in the spinlock(3) manual page. - */ -/* #undef JEMALLOC_OSSPIN */ - /* Defined if syscall(2) is usable. */ #define JEMALLOC_USE_SYSCALL /* * Defined if secure_getenv(3) is available. */ -#define JEMALLOC_HAVE_SECURE_GETENV +/* #undef JEMALLOC_HAVE_SECURE_GETENV */ /* * Defined if issetugid(2) is available. @@ -110,21 +88,32 @@ /* Defined if pthread_setname_np(3) is available. */ #define JEMALLOC_HAVE_PTHREAD_SETNAME_NP +/* Defined if pthread_getname_np(3) is available. */ +#define JEMALLOC_HAVE_PTHREAD_GETNAME_NP + +/* Defined if pthread_get_name_np(3) is available. */ +/* #undef JEMALLOC_HAVE_PTHREAD_GET_NAME_NP */ + /* * Defined if clock_gettime(CLOCK_MONOTONIC_COARSE, ...) is available. */ -#define JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE 1 +#define JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE /* * Defined if clock_gettime(CLOCK_MONOTONIC, ...) is available. */ -#define JEMALLOC_HAVE_CLOCK_MONOTONIC 1 +#define JEMALLOC_HAVE_CLOCK_MONOTONIC /* * Defined if mach_absolute_time() is available. */ /* #undef JEMALLOC_HAVE_MACH_ABSOLUTE_TIME */ +/* + * Defined if clock_gettime(CLOCK_REALTIME, ...) is available. + */ +#define JEMALLOC_HAVE_CLOCK_REALTIME + /* * Defined if _malloc_thread_cleanup() exists. At least in the case of * FreeBSD, pthread_key_create() allocates, which if used during malloc @@ -187,6 +176,9 @@ /* Support utrace(2)-based tracing. */ /* #undef JEMALLOC_UTRACE */ +/* Support utrace(2)-based tracing (label based signature). */ +/* #undef JEMALLOC_UTRACE_LABEL */ + /* Support optional abort() on OOM. */ /* #undef JEMALLOC_XMALLOC */ @@ -202,6 +194,9 @@ /* One page is 2^LG_PAGE bytes. */ #define LG_PAGE 16 +/* Maximum number of regions in a slab. */ +/* #undef CONFIG_LG_SLAB_MAXREGS */ + /* * One huge page is 2^LG_HUGEPAGE bytes. Note that this is defined even if the * system does not explicitly support huge pages; system calls that require @@ -243,6 +238,12 @@ #define JEMALLOC_INTERNAL_FFSL __builtin_ffsl #define JEMALLOC_INTERNAL_FFS __builtin_ffs +/* + * popcount*() functions to use for bitmapping. + */ +#define JEMALLOC_INTERNAL_POPCOUNTL __builtin_popcountl +#define JEMALLOC_INTERNAL_POPCOUNT __builtin_popcount + /* * If defined, explicitly attempt to more uniformly distribute large allocation * pointer alignments across all cache indices. @@ -297,7 +298,7 @@ * MADV_FREE, though typically with higher * system overhead. */ -// #define JEMALLOC_PURGE_MADVISE_FREE +#define JEMALLOC_PURGE_MADVISE_FREE #define JEMALLOC_PURGE_MADVISE_DONTNEED #define JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS @@ -309,17 +310,46 @@ */ #define JEMALLOC_MADVISE_DONTDUMP +/* + * Defined if MADV_[NO]CORE is supported as an argument to madvise. + */ +/* #undef JEMALLOC_MADVISE_NOCORE */ + +/* Defined if mprotect(2) is available. */ +#define JEMALLOC_HAVE_MPROTECT + /* * Defined if transparent huge pages (THPs) are supported via the * MADV_[NO]HUGEPAGE arguments to madvise(2), and THP support is enabled. */ /* #undef JEMALLOC_THP */ +/* Defined if posix_madvise is available. */ +/* #undef JEMALLOC_HAVE_POSIX_MADVISE */ + +/* + * Method for purging unused pages using posix_madvise. + * + * posix_madvise(..., POSIX_MADV_DONTNEED) + */ +/* #undef JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED */ +/* #undef JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED_ZEROS */ + +/* + * Defined if memcntl page admin call is supported + */ +/* #undef JEMALLOC_HAVE_MEMCNTL */ + +/* + * Defined if malloc_size is supported + */ +/* #undef JEMALLOC_HAVE_MALLOC_SIZE */ + /* Define if operating system has alloca.h header. */ -#define JEMALLOC_HAS_ALLOCA_H 1 +#define JEMALLOC_HAS_ALLOCA_H /* C99 restrict keyword supported. */ -#define JEMALLOC_HAS_RESTRICT 1 +#define JEMALLOC_HAS_RESTRICT /* For use by hash code. */ /* #undef JEMALLOC_BIG_ENDIAN */ @@ -360,7 +390,7 @@ /* * If defined, all the features necessary for background threads are present. */ -#define JEMALLOC_BACKGROUND_THREAD 1 +#define JEMALLOC_BACKGROUND_THREAD /* * If defined, jemalloc symbols are not exported (doesn't work when @@ -369,20 +399,29 @@ /* #undef JEMALLOC_EXPORT */ /* config.malloc_conf options string. */ -#define JEMALLOC_CONFIG_MALLOC_CONF "" +#define JEMALLOC_CONFIG_MALLOC_CONF "@JEMALLOC_CONFIG_MALLOC_CONF@" /* If defined, jemalloc takes the malloc/free/etc. symbol names. */ -#define JEMALLOC_IS_MALLOC 1 +#define JEMALLOC_IS_MALLOC /* * Defined if strerror_r returns char * if _GNU_SOURCE is defined. */ #define JEMALLOC_STRERROR_R_RETURNS_CHAR_WITH_GNU_SOURCE -/* - * popcount*() functions to use for bitmapping. - */ -#define JEMALLOC_INTERNAL_POPCOUNTL __builtin_popcountl -#define JEMALLOC_INTERNAL_POPCOUNT __builtin_popcount +/* Performs additional safety checks when defined. */ +/* #undef JEMALLOC_OPT_SAFETY_CHECKS */ + +/* Is C++ support being built? */ +/* #undef JEMALLOC_ENABLE_CXX */ + +/* Performs additional size checks when defined. */ +/* #undef JEMALLOC_OPT_SIZE_CHECKS */ + +/* Allows sampled junk and stash for checking use-after-free when defined. */ +/* #undef JEMALLOC_UAF_DETECTION */ + +/* Darwin VM_MAKE_TAG support */ +/* #undef JEMALLOC_HAVE_VM_MAKE_TAG */ #endif /* JEMALLOC_INTERNAL_DEFS_H_ */ diff --git a/contrib/jemalloc-cmake/include_linux_aarch64/jemalloc/jemalloc_defs.h b/contrib/jemalloc-cmake/include_linux_aarch64/jemalloc/jemalloc_defs.h deleted file mode 100644 index d1389237a77..00000000000 --- a/contrib/jemalloc-cmake/include_linux_aarch64/jemalloc/jemalloc_defs.h +++ /dev/null @@ -1,43 +0,0 @@ -/* include/jemalloc/jemalloc_defs.h. Generated from jemalloc_defs.h.in by configure. */ -/* Defined if __attribute__((...)) syntax is supported. */ -#define JEMALLOC_HAVE_ATTR - -/* Defined if alloc_size attribute is supported. */ -#define JEMALLOC_HAVE_ATTR_ALLOC_SIZE - -/* Defined if format(printf, ...) attribute is supported. */ -#define JEMALLOC_HAVE_ATTR_FORMAT_PRINTF - -/* - * Define overrides for non-standard allocator-related functions if they are - * present on the system. - */ -#define JEMALLOC_OVERRIDE_MEMALIGN -#define JEMALLOC_OVERRIDE_VALLOC - -/* - * At least Linux omits the "const" in: - * - * size_t malloc_usable_size(const void *ptr); - * - * Match the operating system's prototype. - */ -#define JEMALLOC_USABLE_SIZE_CONST - -/* - * If defined, specify throw() for the public function prototypes when compiling - * with C++. The only justification for this is to match the prototypes that - * glibc defines. - */ -#define JEMALLOC_USE_CXX_THROW - -#ifdef _MSC_VER -# ifdef _WIN64 -# define LG_SIZEOF_PTR_WIN 3 -# else -# define LG_SIZEOF_PTR_WIN 2 -# endif -#endif - -/* sizeof(void *) == 2^LG_SIZEOF_PTR. */ -#define LG_SIZEOF_PTR 3 diff --git a/contrib/jemalloc-cmake/include_linux_aarch64/jemalloc/jemalloc_macros.h b/contrib/jemalloc-cmake/include_linux_aarch64/jemalloc/jemalloc_macros.h deleted file mode 100644 index 34235894285..00000000000 --- a/contrib/jemalloc-cmake/include_linux_aarch64/jemalloc/jemalloc_macros.h +++ /dev/null @@ -1,129 +0,0 @@ -#include -#include -#include -#include -#include - -#define JEMALLOC_VERSION "5.2.1-0-gea6b3e973b477b8061e0076bb257dbd7f3faa756" -#define JEMALLOC_VERSION_MAJOR 5 -#define JEMALLOC_VERSION_MINOR 2 -#define JEMALLOC_VERSION_BUGFIX 1 -#define JEMALLOC_VERSION_NREV 0 -#define JEMALLOC_VERSION_GID "ea6b3e973b477b8061e0076bb257dbd7f3faa756" -#define JEMALLOC_VERSION_GID_IDENT ea6b3e973b477b8061e0076bb257dbd7f3faa756 - -#define MALLOCX_LG_ALIGN(la) ((int)(la)) -#if LG_SIZEOF_PTR == 2 -# define MALLOCX_ALIGN(a) ((int)(ffs((int)(a))-1)) -#else -# define MALLOCX_ALIGN(a) \ - ((int)(((size_t)(a) < (size_t)INT_MAX) ? ffs((int)(a))-1 : \ - ffs((int)(((size_t)(a))>>32))+31)) -#endif -#define MALLOCX_ZERO ((int)0x40) -/* - * Bias tcache index bits so that 0 encodes "automatic tcache management", and 1 - * encodes MALLOCX_TCACHE_NONE. - */ -#define MALLOCX_TCACHE(tc) ((int)(((tc)+2) << 8)) -#define MALLOCX_TCACHE_NONE MALLOCX_TCACHE(-1) -/* - * Bias arena index bits so that 0 encodes "use an automatically chosen arena". - */ -#define MALLOCX_ARENA(a) ((((int)(a))+1) << 20) - -/* - * Use as arena index in "arena..{purge,decay,dss}" and - * "stats.arenas..*" mallctl interfaces to select all arenas. This - * definition is intentionally specified in raw decimal format to support - * cpp-based string concatenation, e.g. - * - * #define STRINGIFY_HELPER(x) #x - * #define STRINGIFY(x) STRINGIFY_HELPER(x) - * - * mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".purge", NULL, NULL, NULL, - * 0); - */ -#define MALLCTL_ARENAS_ALL 4096 -/* - * Use as arena index in "stats.arenas..*" mallctl interfaces to select - * destroyed arenas. - */ -#define MALLCTL_ARENAS_DESTROYED 4097 - -#if defined(__cplusplus) && defined(JEMALLOC_USE_CXX_THROW) -# define JEMALLOC_CXX_THROW throw() -#else -# define JEMALLOC_CXX_THROW -#endif - -#if defined(_MSC_VER) -# define JEMALLOC_ATTR(s) -# define JEMALLOC_ALIGNED(s) __declspec(align(s)) -# define JEMALLOC_ALLOC_SIZE(s) -# define JEMALLOC_ALLOC_SIZE2(s1, s2) -# ifndef JEMALLOC_EXPORT -# ifdef DLLEXPORT -# define JEMALLOC_EXPORT __declspec(dllexport) -# else -# define JEMALLOC_EXPORT __declspec(dllimport) -# endif -# endif -# define JEMALLOC_FORMAT_ARG(i) -# define JEMALLOC_FORMAT_PRINTF(s, i) -# define JEMALLOC_NOINLINE __declspec(noinline) -# ifdef __cplusplus -# define JEMALLOC_NOTHROW __declspec(nothrow) -# else -# define JEMALLOC_NOTHROW -# endif -# define JEMALLOC_SECTION(s) __declspec(allocate(s)) -# define JEMALLOC_RESTRICT_RETURN __declspec(restrict) -# if _MSC_VER >= 1900 && !defined(__EDG__) -# define JEMALLOC_ALLOCATOR __declspec(allocator) -# else -# define JEMALLOC_ALLOCATOR -# endif -#elif defined(JEMALLOC_HAVE_ATTR) -# define JEMALLOC_ATTR(s) __attribute__((s)) -# define JEMALLOC_ALIGNED(s) JEMALLOC_ATTR(aligned(s)) -# ifdef JEMALLOC_HAVE_ATTR_ALLOC_SIZE -# define JEMALLOC_ALLOC_SIZE(s) JEMALLOC_ATTR(alloc_size(s)) -# define JEMALLOC_ALLOC_SIZE2(s1, s2) JEMALLOC_ATTR(alloc_size(s1, s2)) -# else -# define JEMALLOC_ALLOC_SIZE(s) -# define JEMALLOC_ALLOC_SIZE2(s1, s2) -# endif -# ifndef JEMALLOC_EXPORT -# define JEMALLOC_EXPORT JEMALLOC_ATTR(visibility("default")) -# endif -# ifdef JEMALLOC_HAVE_ATTR_FORMAT_ARG -# define JEMALLOC_FORMAT_ARG(i) JEMALLOC_ATTR(__format_arg__(3)) -# else -# define JEMALLOC_FORMAT_ARG(i) -# endif -# ifdef JEMALLOC_HAVE_ATTR_FORMAT_GNU_PRINTF -# define JEMALLOC_FORMAT_PRINTF(s, i) JEMALLOC_ATTR(format(gnu_printf, s, i)) -# elif defined(JEMALLOC_HAVE_ATTR_FORMAT_PRINTF) -# define JEMALLOC_FORMAT_PRINTF(s, i) JEMALLOC_ATTR(format(printf, s, i)) -# else -# define JEMALLOC_FORMAT_PRINTF(s, i) -# endif -# define JEMALLOC_NOINLINE JEMALLOC_ATTR(noinline) -# define JEMALLOC_NOTHROW JEMALLOC_ATTR(nothrow) -# define JEMALLOC_SECTION(s) JEMALLOC_ATTR(section(s)) -# define JEMALLOC_RESTRICT_RETURN -# define JEMALLOC_ALLOCATOR -#else -# define JEMALLOC_ATTR(s) -# define JEMALLOC_ALIGNED(s) -# define JEMALLOC_ALLOC_SIZE(s) -# define JEMALLOC_ALLOC_SIZE2(s1, s2) -# define JEMALLOC_EXPORT -# define JEMALLOC_FORMAT_PRINTF(s, i) -# define JEMALLOC_NOINLINE -# define JEMALLOC_NOTHROW -# define JEMALLOC_SECTION(s) -# define JEMALLOC_RESTRICT_RETURN -# define JEMALLOC_ALLOCATOR -#endif diff --git a/contrib/jemalloc-cmake/include_linux_aarch64/jemalloc/jemalloc_protos.h b/contrib/jemalloc-cmake/include_linux_aarch64/jemalloc/jemalloc_protos.h deleted file mode 100644 index ff025e30fa7..00000000000 --- a/contrib/jemalloc-cmake/include_linux_aarch64/jemalloc/jemalloc_protos.h +++ /dev/null @@ -1,66 +0,0 @@ -/* - * The je_ prefix on the following public symbol declarations is an artifact - * of namespace management, and should be omitted in application code unless - * JEMALLOC_NO_DEMANGLE is defined (see jemalloc_mangle.h). - */ -extern JEMALLOC_EXPORT const char *je_malloc_conf; -extern JEMALLOC_EXPORT void (*je_malloc_message)(void *cbopaque, - const char *s); - -JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN - void JEMALLOC_NOTHROW *je_malloc(size_t size) - JEMALLOC_CXX_THROW JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE(1); -JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN - void JEMALLOC_NOTHROW *je_calloc(size_t num, size_t size) - JEMALLOC_CXX_THROW JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE2(1, 2); -JEMALLOC_EXPORT int JEMALLOC_NOTHROW je_posix_memalign(void **memptr, - size_t alignment, size_t size) JEMALLOC_CXX_THROW JEMALLOC_ATTR(nonnull(1)); -JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN - void JEMALLOC_NOTHROW *je_aligned_alloc(size_t alignment, - size_t size) JEMALLOC_CXX_THROW JEMALLOC_ATTR(malloc) - JEMALLOC_ALLOC_SIZE(2); -JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN - void JEMALLOC_NOTHROW *je_realloc(void *ptr, size_t size) - JEMALLOC_CXX_THROW JEMALLOC_ALLOC_SIZE(2); -JEMALLOC_EXPORT void JEMALLOC_NOTHROW je_free(void *ptr) - JEMALLOC_CXX_THROW; - -JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN - void JEMALLOC_NOTHROW *je_mallocx(size_t size, int flags) - JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE(1); -JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN - void JEMALLOC_NOTHROW *je_rallocx(void *ptr, size_t size, - int flags) JEMALLOC_ALLOC_SIZE(2); -JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW je_xallocx(void *ptr, size_t size, - size_t extra, int flags); -JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW je_sallocx(const void *ptr, - int flags) JEMALLOC_ATTR(pure); -JEMALLOC_EXPORT void JEMALLOC_NOTHROW je_dallocx(void *ptr, int flags); -JEMALLOC_EXPORT void JEMALLOC_NOTHROW je_sdallocx(void *ptr, size_t size, - int flags); -JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW je_nallocx(size_t size, int flags) - JEMALLOC_ATTR(pure); - -JEMALLOC_EXPORT int JEMALLOC_NOTHROW je_mallctl(const char *name, - void *oldp, size_t *oldlenp, void *newp, size_t newlen); -JEMALLOC_EXPORT int JEMALLOC_NOTHROW je_mallctlnametomib(const char *name, - size_t *mibp, size_t *miblenp); -JEMALLOC_EXPORT int JEMALLOC_NOTHROW je_mallctlbymib(const size_t *mib, - size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen); -JEMALLOC_EXPORT void JEMALLOC_NOTHROW je_malloc_stats_print( - void (*write_cb)(void *, const char *), void *je_cbopaque, - const char *opts); -JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW je_malloc_usable_size( - JEMALLOC_USABLE_SIZE_CONST void *ptr) JEMALLOC_CXX_THROW; - -#ifdef JEMALLOC_OVERRIDE_MEMALIGN -JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN - void JEMALLOC_NOTHROW *je_memalign(size_t alignment, size_t size) - JEMALLOC_CXX_THROW JEMALLOC_ATTR(malloc); -#endif - -#ifdef JEMALLOC_OVERRIDE_VALLOC -JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN - void JEMALLOC_NOTHROW *je_valloc(size_t size) JEMALLOC_CXX_THROW - JEMALLOC_ATTR(malloc); -#endif diff --git a/contrib/jemalloc-cmake/include_linux_aarch64/jemalloc/jemalloc_typedefs.h b/contrib/jemalloc-cmake/include_linux_aarch64/jemalloc/jemalloc_typedefs.h deleted file mode 100644 index 1a58874306e..00000000000 --- a/contrib/jemalloc-cmake/include_linux_aarch64/jemalloc/jemalloc_typedefs.h +++ /dev/null @@ -1,77 +0,0 @@ -typedef struct extent_hooks_s extent_hooks_t; - -/* - * void * - * extent_alloc(extent_hooks_t *extent_hooks, void *new_addr, size_t size, - * size_t alignment, bool *zero, bool *commit, unsigned arena_ind); - */ -typedef void *(extent_alloc_t)(extent_hooks_t *, void *, size_t, size_t, bool *, - bool *, unsigned); - -/* - * bool - * extent_dalloc(extent_hooks_t *extent_hooks, void *addr, size_t size, - * bool committed, unsigned arena_ind); - */ -typedef bool (extent_dalloc_t)(extent_hooks_t *, void *, size_t, bool, - unsigned); - -/* - * void - * extent_destroy(extent_hooks_t *extent_hooks, void *addr, size_t size, - * bool committed, unsigned arena_ind); - */ -typedef void (extent_destroy_t)(extent_hooks_t *, void *, size_t, bool, - unsigned); - -/* - * bool - * extent_commit(extent_hooks_t *extent_hooks, void *addr, size_t size, - * size_t offset, size_t length, unsigned arena_ind); - */ -typedef bool (extent_commit_t)(extent_hooks_t *, void *, size_t, size_t, size_t, - unsigned); - -/* - * bool - * extent_decommit(extent_hooks_t *extent_hooks, void *addr, size_t size, - * size_t offset, size_t length, unsigned arena_ind); - */ -typedef bool (extent_decommit_t)(extent_hooks_t *, void *, size_t, size_t, - size_t, unsigned); - -/* - * bool - * extent_purge(extent_hooks_t *extent_hooks, void *addr, size_t size, - * size_t offset, size_t length, unsigned arena_ind); - */ -typedef bool (extent_purge_t)(extent_hooks_t *, void *, size_t, size_t, size_t, - unsigned); - -/* - * bool - * extent_split(extent_hooks_t *extent_hooks, void *addr, size_t size, - * size_t size_a, size_t size_b, bool committed, unsigned arena_ind); - */ -typedef bool (extent_split_t)(extent_hooks_t *, void *, size_t, size_t, size_t, - bool, unsigned); - -/* - * bool - * extent_merge(extent_hooks_t *extent_hooks, void *addr_a, size_t size_a, - * void *addr_b, size_t size_b, bool committed, unsigned arena_ind); - */ -typedef bool (extent_merge_t)(extent_hooks_t *, void *, size_t, void *, size_t, - bool, unsigned); - -struct extent_hooks_s { - extent_alloc_t *alloc; - extent_dalloc_t *dalloc; - extent_destroy_t *destroy; - extent_commit_t *commit; - extent_decommit_t *decommit; - extent_purge_t *purge_lazy; - extent_purge_t *purge_forced; - extent_split_t *split; - extent_merge_t *merge; -}; diff --git a/contrib/jemalloc-cmake/include_linux_ppc64le/jemalloc/internal/jemalloc_internal_defs.h.in b/contrib/jemalloc-cmake/include_linux_ppc64le/jemalloc/internal/jemalloc_internal_defs.h.in new file mode 100644 index 00000000000..12890f80ef1 --- /dev/null +++ b/contrib/jemalloc-cmake/include_linux_ppc64le/jemalloc/internal/jemalloc_internal_defs.h.in @@ -0,0 +1,427 @@ +/* include/jemalloc/internal/jemalloc_internal_defs.h. Generated from jemalloc_internal_defs.h.in by configure. */ +#ifndef JEMALLOC_INTERNAL_DEFS_H_ +#define JEMALLOC_INTERNAL_DEFS_H_ +/* + * If JEMALLOC_PREFIX is defined via --with-jemalloc-prefix, it will cause all + * public APIs to be prefixed. This makes it possible, with some care, to use + * multiple allocators simultaneously. + */ +/* #undef JEMALLOC_PREFIX */ +/* #undef JEMALLOC_CPREFIX */ + +/* + * Define overrides for non-standard allocator-related functions if they are + * present on the system. + */ +#if !defined(USE_MUSL) + #define JEMALLOC_OVERRIDE___LIBC_CALLOC + #define JEMALLOC_OVERRIDE___LIBC_FREE + #define JEMALLOC_OVERRIDE___LIBC_MALLOC + #define JEMALLOC_OVERRIDE___LIBC_MEMALIGN + #define JEMALLOC_OVERRIDE___LIBC_REALLOC + #define JEMALLOC_OVERRIDE___LIBC_VALLOC +#endif +/* #undef JEMALLOC_OVERRIDE___POSIX_MEMALIGN */ + +/* + * JEMALLOC_PRIVATE_NAMESPACE is used as a prefix for all library-private APIs. + * For shared libraries, symbol visibility mechanisms prevent these symbols + * from being exported, but for static libraries, naming collisions are a real + * possibility. + */ +#define JEMALLOC_PRIVATE_NAMESPACE je_ + +/* + * Hyper-threaded CPUs may need a special instruction inside spin loops in + * order to yield to another virtual CPU. + */ +#define CPU_SPINWAIT +/* 1 if CPU_SPINWAIT is defined, 0 otherwise. */ +#define HAVE_CPU_SPINWAIT 0 + +/* + * Number of significant bits in virtual addresses. This may be less than the + * total number of bits in a pointer, e.g. on x64, for which the uppermost 16 + * bits are the same as bit 47. + */ +#define LG_VADDR 64 + +/* Defined if C11 atomics are available. */ +#define JEMALLOC_C11_ATOMICS + +/* Defined if GCC __atomic atomics are available. */ +#define JEMALLOC_GCC_ATOMIC_ATOMICS +/* and the 8-bit variant support. */ +#define JEMALLOC_GCC_U8_ATOMIC_ATOMICS + +/* Defined if GCC __sync atomics are available. */ +#define JEMALLOC_GCC_SYNC_ATOMICS +/* and the 8-bit variant support. */ +#define JEMALLOC_GCC_U8_SYNC_ATOMICS + +/* + * Defined if __builtin_clz() and __builtin_clzl() are available. + */ +#define JEMALLOC_HAVE_BUILTIN_CLZ + +/* + * Defined if os_unfair_lock_*() functions are available, as provided by Darwin. + */ +/* #undef JEMALLOC_OS_UNFAIR_LOCK */ + +/* Defined if syscall(2) is usable. */ +#define JEMALLOC_USE_SYSCALL + +/* + * Defined if secure_getenv(3) is available. + */ +/* #undef JEMALLOC_HAVE_SECURE_GETENV */ + +/* + * Defined if issetugid(2) is available. + */ +/* #undef JEMALLOC_HAVE_ISSETUGID */ + +/* Defined if pthread_atfork(3) is available. */ +/* #undef JEMALLOC_HAVE_PTHREAD_ATFORK */ + +/* Defined if pthread_setname_np(3) is available. */ +#define JEMALLOC_HAVE_PTHREAD_SETNAME_NP + +/* Defined if pthread_getname_np(3) is available. */ +#define JEMALLOC_HAVE_PTHREAD_GETNAME_NP + +/* Defined if pthread_get_name_np(3) is available. */ +/* #undef JEMALLOC_HAVE_PTHREAD_GET_NAME_NP */ + +/* + * Defined if clock_gettime(CLOCK_MONOTONIC_COARSE, ...) is available. + */ +#define JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE + +/* + * Defined if clock_gettime(CLOCK_MONOTONIC, ...) is available. + */ +#define JEMALLOC_HAVE_CLOCK_MONOTONIC + +/* + * Defined if mach_absolute_time() is available. + */ +/* #undef JEMALLOC_HAVE_MACH_ABSOLUTE_TIME */ + +/* + * Defined if clock_gettime(CLOCK_REALTIME, ...) is available. + */ +#define JEMALLOC_HAVE_CLOCK_REALTIME + +/* + * Defined if _malloc_thread_cleanup() exists. At least in the case of + * FreeBSD, pthread_key_create() allocates, which if used during malloc + * bootstrapping will cause recursion into the pthreads library. Therefore, if + * _malloc_thread_cleanup() exists, use it as the basis for thread cleanup in + * malloc_tsd. + */ +/* #undef JEMALLOC_MALLOC_THREAD_CLEANUP */ + +/* + * Defined if threaded initialization is known to be safe on this platform. + * Among other things, it must be possible to initialize a mutex without + * triggering allocation in order for threaded allocation to be safe. + */ +#define JEMALLOC_THREADED_INIT + +/* + * Defined if the pthreads implementation defines + * _pthread_mutex_init_calloc_cb(), in which case the function is used in order + * to avoid recursive allocation during mutex initialization. + */ +/* #undef JEMALLOC_MUTEX_INIT_CB */ + +/* Non-empty if the tls_model attribute is supported. */ +#define JEMALLOC_TLS_MODEL __attribute__((tls_model("initial-exec"))) + +/* + * JEMALLOC_DEBUG enables assertions and other sanity checks, and disables + * inline functions. + */ +/* #undef JEMALLOC_DEBUG */ + +/* JEMALLOC_STATS enables statistics calculation. */ +#define JEMALLOC_STATS + +/* JEMALLOC_EXPERIMENTAL_SMALLOCX_API enables experimental smallocx API. */ +/* #undef JEMALLOC_EXPERIMENTAL_SMALLOCX_API */ + +/* JEMALLOC_PROF enables allocation profiling. */ +/* #undef JEMALLOC_PROF */ + +/* Use libunwind for profile backtracing if defined. */ +/* #undef JEMALLOC_PROF_LIBUNWIND */ + +/* Use libgcc for profile backtracing if defined. */ +/* #undef JEMALLOC_PROF_LIBGCC */ + +/* Use gcc intrinsics for profile backtracing if defined. */ +/* #undef JEMALLOC_PROF_GCC */ + +/* + * JEMALLOC_DSS enables use of sbrk(2) to allocate extents from the data storage + * segment (DSS). + */ +#define JEMALLOC_DSS + +/* Support memory filling (junk/zero). */ +#define JEMALLOC_FILL + +/* Support utrace(2)-based tracing. */ +/* #undef JEMALLOC_UTRACE */ + +/* Support utrace(2)-based tracing (label based signature). */ +/* #undef JEMALLOC_UTRACE_LABEL */ + +/* Support optional abort() on OOM. */ +/* #undef JEMALLOC_XMALLOC */ + +/* Support lazy locking (avoid locking unless a second thread is launched). */ +/* #undef JEMALLOC_LAZY_LOCK */ + +/* + * Minimum allocation alignment is 2^LG_QUANTUM bytes (ignoring tiny size + * classes). + */ +/* #undef LG_QUANTUM */ + +/* One page is 2^LG_PAGE bytes. */ +#define LG_PAGE 16 + +/* Maximum number of regions in a slab. */ +/* #undef CONFIG_LG_SLAB_MAXREGS */ + +/* + * One huge page is 2^LG_HUGEPAGE bytes. Note that this is defined even if the + * system does not explicitly support huge pages; system calls that require + * explicit huge page support are separately configured. + */ +#define LG_HUGEPAGE 21 + +/* + * If defined, adjacent virtual memory mappings with identical attributes + * automatically coalesce, and they fragment when changes are made to subranges. + * This is the normal order of things for mmap()/munmap(), but on Windows + * VirtualAlloc()/VirtualFree() operations must be precisely matched, i.e. + * mappings do *not* coalesce/fragment. + */ +#define JEMALLOC_MAPS_COALESCE + +/* + * If defined, retain memory for later reuse by default rather than using e.g. + * munmap() to unmap freed extents. This is enabled on 64-bit Linux because + * common sequences of mmap()/munmap() calls will cause virtual memory map + * holes. + */ +#define JEMALLOC_RETAIN + +/* TLS is used to map arenas and magazine caches to threads. */ +#define JEMALLOC_TLS + +/* + * Used to mark unreachable code to quiet "end of non-void" compiler warnings. + * Don't use this directly; instead use unreachable() from util.h + */ +#define JEMALLOC_INTERNAL_UNREACHABLE __builtin_unreachable + +/* + * ffs*() functions to use for bitmapping. Don't use these directly; instead, + * use ffs_*() from util.h. + */ +#define JEMALLOC_INTERNAL_FFSLL __builtin_ffsll +#define JEMALLOC_INTERNAL_FFSL __builtin_ffsl +#define JEMALLOC_INTERNAL_FFS __builtin_ffs + +/* + * popcount*() functions to use for bitmapping. + */ +#define JEMALLOC_INTERNAL_POPCOUNTL __builtin_popcountl +#define JEMALLOC_INTERNAL_POPCOUNT __builtin_popcount + +/* + * If defined, explicitly attempt to more uniformly distribute large allocation + * pointer alignments across all cache indices. + */ +#define JEMALLOC_CACHE_OBLIVIOUS + +/* + * If defined, enable logging facilities. We make this a configure option to + * avoid taking extra branches everywhere. + */ +/* #undef JEMALLOC_LOG */ + +/* + * If defined, use readlinkat() (instead of readlink()) to follow + * /etc/malloc_conf. + */ +/* #undef JEMALLOC_READLINKAT */ + +/* + * Darwin (OS X) uses zones to work around Mach-O symbol override shortcomings. + */ +/* #undef JEMALLOC_ZONE */ + +/* + * Methods for determining whether the OS overcommits. + * JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY: Linux's + * /proc/sys/vm.overcommit_memory file. + * JEMALLOC_SYSCTL_VM_OVERCOMMIT: FreeBSD's vm.overcommit sysctl. + */ +/* #undef JEMALLOC_SYSCTL_VM_OVERCOMMIT */ +#define JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY + +/* Defined if madvise(2) is available. */ +#define JEMALLOC_HAVE_MADVISE + +/* + * Defined if transparent huge pages are supported via the MADV_[NO]HUGEPAGE + * arguments to madvise(2). + */ +#define JEMALLOC_HAVE_MADVISE_HUGE + +/* + * Methods for purging unused pages differ between operating systems. + * + * madvise(..., MADV_FREE) : This marks pages as being unused, such that they + * will be discarded rather than swapped out. + * madvise(..., MADV_DONTNEED) : If JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS is + * defined, this immediately discards pages, + * such that new pages will be demand-zeroed if + * the address region is later touched; + * otherwise this behaves similarly to + * MADV_FREE, though typically with higher + * system overhead. + */ +#define JEMALLOC_PURGE_MADVISE_FREE +#define JEMALLOC_PURGE_MADVISE_DONTNEED +#define JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS + +/* Defined if madvise(2) is available but MADV_FREE is not (x86 Linux only). */ +#define JEMALLOC_DEFINE_MADVISE_FREE + +/* + * Defined if MADV_DO[NT]DUMP is supported as an argument to madvise. + */ +#define JEMALLOC_MADVISE_DONTDUMP + +/* + * Defined if MADV_[NO]CORE is supported as an argument to madvise. + */ +/* #undef JEMALLOC_MADVISE_NOCORE */ + +/* Defined if mprotect(2) is available. */ +#define JEMALLOC_HAVE_MPROTECT + +/* + * Defined if transparent huge pages (THPs) are supported via the + * MADV_[NO]HUGEPAGE arguments to madvise(2), and THP support is enabled. + */ +/* #undef JEMALLOC_THP */ + +/* Defined if posix_madvise is available. */ +/* #undef JEMALLOC_HAVE_POSIX_MADVISE */ + +/* + * Method for purging unused pages using posix_madvise. + * + * posix_madvise(..., POSIX_MADV_DONTNEED) + */ +/* #undef JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED */ +/* #undef JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED_ZEROS */ + +/* + * Defined if memcntl page admin call is supported + */ +/* #undef JEMALLOC_HAVE_MEMCNTL */ + +/* + * Defined if malloc_size is supported + */ +/* #undef JEMALLOC_HAVE_MALLOC_SIZE */ + +/* Define if operating system has alloca.h header. */ +#define JEMALLOC_HAS_ALLOCA_H + +/* C99 restrict keyword supported. */ +#define JEMALLOC_HAS_RESTRICT + +/* For use by hash code. */ +/* #undef JEMALLOC_BIG_ENDIAN */ + +/* sizeof(int) == 2^LG_SIZEOF_INT. */ +#define LG_SIZEOF_INT 2 + +/* sizeof(long) == 2^LG_SIZEOF_LONG. */ +#define LG_SIZEOF_LONG 3 + +/* sizeof(long long) == 2^LG_SIZEOF_LONG_LONG. */ +#define LG_SIZEOF_LONG_LONG 3 + +/* sizeof(intmax_t) == 2^LG_SIZEOF_INTMAX_T. */ +#define LG_SIZEOF_INTMAX_T 3 + +/* glibc malloc hooks (__malloc_hook, __realloc_hook, __free_hook). */ +#define JEMALLOC_GLIBC_MALLOC_HOOK + +/* glibc memalign hook. */ +#define JEMALLOC_GLIBC_MEMALIGN_HOOK + +/* pthread support */ +#define JEMALLOC_HAVE_PTHREAD + +/* dlsym() support */ +#define JEMALLOC_HAVE_DLSYM + +/* Adaptive mutex support in pthreads. */ +#define JEMALLOC_HAVE_PTHREAD_MUTEX_ADAPTIVE_NP + +/* GNU specific sched_getcpu support */ +#define JEMALLOC_HAVE_SCHED_GETCPU + +/* GNU specific sched_setaffinity support */ +#define JEMALLOC_HAVE_SCHED_SETAFFINITY + +/* + * If defined, all the features necessary for background threads are present. + */ +#define JEMALLOC_BACKGROUND_THREAD + +/* + * If defined, jemalloc symbols are not exported (doesn't work when + * JEMALLOC_PREFIX is not defined). + */ +/* #undef JEMALLOC_EXPORT */ + +/* config.malloc_conf options string. */ +#define JEMALLOC_CONFIG_MALLOC_CONF "@JEMALLOC_CONFIG_MALLOC_CONF@" + +/* If defined, jemalloc takes the malloc/free/etc. symbol names. */ +#define JEMALLOC_IS_MALLOC + +/* + * Defined if strerror_r returns char * if _GNU_SOURCE is defined. + */ +#define JEMALLOC_STRERROR_R_RETURNS_CHAR_WITH_GNU_SOURCE + +/* Performs additional safety checks when defined. */ +/* #undef JEMALLOC_OPT_SAFETY_CHECKS */ + +/* Is C++ support being built? */ +/* #undef JEMALLOC_ENABLE_CXX */ + +/* Performs additional size checks when defined. */ +/* #undef JEMALLOC_OPT_SIZE_CHECKS */ + +/* Allows sampled junk and stash for checking use-after-free when defined. */ +/* #undef JEMALLOC_UAF_DETECTION */ + +/* Darwin VM_MAKE_TAG support */ +/* #undef JEMALLOC_HAVE_VM_MAKE_TAG */ + +#endif /* JEMALLOC_INTERNAL_DEFS_H_ */ diff --git a/contrib/jemalloc-cmake/include_linux_riscv64/jemalloc/internal/jemalloc_internal_defs.h.in b/contrib/jemalloc-cmake/include_linux_riscv64/jemalloc/internal/jemalloc_internal_defs.h.in new file mode 100644 index 00000000000..ad535e6d773 --- /dev/null +++ b/contrib/jemalloc-cmake/include_linux_riscv64/jemalloc/internal/jemalloc_internal_defs.h.in @@ -0,0 +1,427 @@ +/* include/jemalloc/internal/jemalloc_internal_defs.h. Generated from jemalloc_internal_defs.h.in by configure. */ +#ifndef JEMALLOC_INTERNAL_DEFS_H_ +#define JEMALLOC_INTERNAL_DEFS_H_ +/* + * If JEMALLOC_PREFIX is defined via --with-jemalloc-prefix, it will cause all + * public APIs to be prefixed. This makes it possible, with some care, to use + * multiple allocators simultaneously. + */ +/* #undef JEMALLOC_PREFIX */ +/* #undef JEMALLOC_CPREFIX */ + +/* + * Define overrides for non-standard allocator-related functions if they are + * present on the system. + */ +#if !defined(USE_MUSL) + #define JEMALLOC_OVERRIDE___LIBC_CALLOC + #define JEMALLOC_OVERRIDE___LIBC_FREE + #define JEMALLOC_OVERRIDE___LIBC_MALLOC + #define JEMALLOC_OVERRIDE___LIBC_MEMALIGN + #define JEMALLOC_OVERRIDE___LIBC_REALLOC + #define JEMALLOC_OVERRIDE___LIBC_VALLOC +#endif +/* #undef JEMALLOC_OVERRIDE___POSIX_MEMALIGN */ + +/* + * JEMALLOC_PRIVATE_NAMESPACE is used as a prefix for all library-private APIs. + * For shared libraries, symbol visibility mechanisms prevent these symbols + * from being exported, but for static libraries, naming collisions are a real + * possibility. + */ +#define JEMALLOC_PRIVATE_NAMESPACE je_ + +/* + * Hyper-threaded CPUs may need a special instruction inside spin loops in + * order to yield to another virtual CPU. + */ +#define CPU_SPINWAIT +/* 1 if CPU_SPINWAIT is defined, 0 otherwise. */ +#define HAVE_CPU_SPINWAIT 0 + +/* + * Number of significant bits in virtual addresses. This may be less than the + * total number of bits in a pointer, e.g. on x64, for which the uppermost 16 + * bits are the same as bit 47. + */ +#define LG_VADDR 48 + +/* Defined if C11 atomics are available. */ +#define JEMALLOC_C11_ATOMICS + +/* Defined if GCC __atomic atomics are available. */ +#define JEMALLOC_GCC_ATOMIC_ATOMICS +/* and the 8-bit variant support. */ +#define JEMALLOC_GCC_U8_ATOMIC_ATOMICS + +/* Defined if GCC __sync atomics are available. */ +#define JEMALLOC_GCC_SYNC_ATOMICS +/* and the 8-bit variant support. */ +#define JEMALLOC_GCC_U8_SYNC_ATOMICS + +/* + * Defined if __builtin_clz() and __builtin_clzl() are available. + */ +#define JEMALLOC_HAVE_BUILTIN_CLZ + +/* + * Defined if os_unfair_lock_*() functions are available, as provided by Darwin. + */ +/* #undef JEMALLOC_OS_UNFAIR_LOCK */ + +/* Defined if syscall(2) is usable. */ +#define JEMALLOC_USE_SYSCALL + +/* + * Defined if secure_getenv(3) is available. + */ +/* #undef JEMALLOC_HAVE_SECURE_GETENV */ + +/* + * Defined if issetugid(2) is available. + */ +/* #undef JEMALLOC_HAVE_ISSETUGID */ + +/* Defined if pthread_atfork(3) is available. */ +#define JEMALLOC_HAVE_PTHREAD_ATFORK + +/* Defined if pthread_setname_np(3) is available. */ +#define JEMALLOC_HAVE_PTHREAD_SETNAME_NP + +/* Defined if pthread_getname_np(3) is available. */ +#define JEMALLOC_HAVE_PTHREAD_GETNAME_NP + +/* Defined if pthread_get_name_np(3) is available. */ +/* #undef JEMALLOC_HAVE_PTHREAD_GET_NAME_NP */ + +/* + * Defined if clock_gettime(CLOCK_MONOTONIC_COARSE, ...) is available. + */ +#define JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE + +/* + * Defined if clock_gettime(CLOCK_MONOTONIC, ...) is available. + */ +#define JEMALLOC_HAVE_CLOCK_MONOTONIC + +/* + * Defined if mach_absolute_time() is available. + */ +/* #undef JEMALLOC_HAVE_MACH_ABSOLUTE_TIME */ + +/* + * Defined if clock_gettime(CLOCK_REALTIME, ...) is available. + */ +#define JEMALLOC_HAVE_CLOCK_REALTIME + +/* + * Defined if _malloc_thread_cleanup() exists. At least in the case of + * FreeBSD, pthread_key_create() allocates, which if used during malloc + * bootstrapping will cause recursion into the pthreads library. Therefore, if + * _malloc_thread_cleanup() exists, use it as the basis for thread cleanup in + * malloc_tsd. + */ +/* #undef JEMALLOC_MALLOC_THREAD_CLEANUP */ + +/* + * Defined if threaded initialization is known to be safe on this platform. + * Among other things, it must be possible to initialize a mutex without + * triggering allocation in order for threaded allocation to be safe. + */ +#define JEMALLOC_THREADED_INIT + +/* + * Defined if the pthreads implementation defines + * _pthread_mutex_init_calloc_cb(), in which case the function is used in order + * to avoid recursive allocation during mutex initialization. + */ +/* #undef JEMALLOC_MUTEX_INIT_CB */ + +/* Non-empty if the tls_model attribute is supported. */ +#define JEMALLOC_TLS_MODEL __attribute__((tls_model("initial-exec"))) + +/* + * JEMALLOC_DEBUG enables assertions and other sanity checks, and disables + * inline functions. + */ +/* #undef JEMALLOC_DEBUG */ + +/* JEMALLOC_STATS enables statistics calculation. */ +#define JEMALLOC_STATS + +/* JEMALLOC_EXPERIMENTAL_SMALLOCX_API enables experimental smallocx API. */ +/* #undef JEMALLOC_EXPERIMENTAL_SMALLOCX_API */ + +/* JEMALLOC_PROF enables allocation profiling. */ +/* #undef JEMALLOC_PROF */ + +/* Use libunwind for profile backtracing if defined. */ +/* #undef JEMALLOC_PROF_LIBUNWIND */ + +/* Use libgcc for profile backtracing if defined. */ +/* #undef JEMALLOC_PROF_LIBGCC */ + +/* Use gcc intrinsics for profile backtracing if defined. */ +/* #undef JEMALLOC_PROF_GCC */ + +/* + * JEMALLOC_DSS enables use of sbrk(2) to allocate extents from the data storage + * segment (DSS). + */ +#define JEMALLOC_DSS + +/* Support memory filling (junk/zero). */ +#define JEMALLOC_FILL + +/* Support utrace(2)-based tracing. */ +/* #undef JEMALLOC_UTRACE */ + +/* Support utrace(2)-based tracing (label based signature). */ +/* #undef JEMALLOC_UTRACE_LABEL */ + +/* Support optional abort() on OOM. */ +/* #undef JEMALLOC_XMALLOC */ + +/* Support lazy locking (avoid locking unless a second thread is launched). */ +/* #undef JEMALLOC_LAZY_LOCK */ + +/* + * Minimum allocation alignment is 2^LG_QUANTUM bytes (ignoring tiny size + * classes). + */ +/* #undef LG_QUANTUM */ + +/* One page is 2^LG_PAGE bytes. */ +#define LG_PAGE 16 + +/* Maximum number of regions in a slab. */ +/* #undef CONFIG_LG_SLAB_MAXREGS */ + +/* + * One huge page is 2^LG_HUGEPAGE bytes. Note that this is defined even if the + * system does not explicitly support huge pages; system calls that require + * explicit huge page support are separately configured. + */ +#define LG_HUGEPAGE 29 + +/* + * If defined, adjacent virtual memory mappings with identical attributes + * automatically coalesce, and they fragment when changes are made to subranges. + * This is the normal order of things for mmap()/munmap(), but on Windows + * VirtualAlloc()/VirtualFree() operations must be precisely matched, i.e. + * mappings do *not* coalesce/fragment. + */ +#define JEMALLOC_MAPS_COALESCE + +/* + * If defined, retain memory for later reuse by default rather than using e.g. + * munmap() to unmap freed extents. This is enabled on 64-bit Linux because + * common sequences of mmap()/munmap() calls will cause virtual memory map + * holes. + */ +#define JEMALLOC_RETAIN + +/* TLS is used to map arenas and magazine caches to threads. */ +#define JEMALLOC_TLS + +/* + * Used to mark unreachable code to quiet "end of non-void" compiler warnings. + * Don't use this directly; instead use unreachable() from util.h + */ +#define JEMALLOC_INTERNAL_UNREACHABLE __builtin_unreachable + +/* + * ffs*() functions to use for bitmapping. Don't use these directly; instead, + * use ffs_*() from util.h. + */ +#define JEMALLOC_INTERNAL_FFSLL __builtin_ffsll +#define JEMALLOC_INTERNAL_FFSL __builtin_ffsl +#define JEMALLOC_INTERNAL_FFS __builtin_ffs + +/* + * popcount*() functions to use for bitmapping. + */ +#define JEMALLOC_INTERNAL_POPCOUNTL __builtin_popcountl +#define JEMALLOC_INTERNAL_POPCOUNT __builtin_popcount + +/* + * If defined, explicitly attempt to more uniformly distribute large allocation + * pointer alignments across all cache indices. + */ +#define JEMALLOC_CACHE_OBLIVIOUS + +/* + * If defined, enable logging facilities. We make this a configure option to + * avoid taking extra branches everywhere. + */ +/* #undef JEMALLOC_LOG */ + +/* + * If defined, use readlinkat() (instead of readlink()) to follow + * /etc/malloc_conf. + */ +/* #undef JEMALLOC_READLINKAT */ + +/* + * Darwin (OS X) uses zones to work around Mach-O symbol override shortcomings. + */ +/* #undef JEMALLOC_ZONE */ + +/* + * Methods for determining whether the OS overcommits. + * JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY: Linux's + * /proc/sys/vm.overcommit_memory file. + * JEMALLOC_SYSCTL_VM_OVERCOMMIT: FreeBSD's vm.overcommit sysctl. + */ +/* #undef JEMALLOC_SYSCTL_VM_OVERCOMMIT */ +#define JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY + +/* Defined if madvise(2) is available. */ +#define JEMALLOC_HAVE_MADVISE + +/* + * Defined if transparent huge pages are supported via the MADV_[NO]HUGEPAGE + * arguments to madvise(2). + */ +#define JEMALLOC_HAVE_MADVISE_HUGE + +/* + * Methods for purging unused pages differ between operating systems. + * + * madvise(..., MADV_FREE) : This marks pages as being unused, such that they + * will be discarded rather than swapped out. + * madvise(..., MADV_DONTNEED) : If JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS is + * defined, this immediately discards pages, + * such that new pages will be demand-zeroed if + * the address region is later touched; + * otherwise this behaves similarly to + * MADV_FREE, though typically with higher + * system overhead. + */ +#define JEMALLOC_PURGE_MADVISE_FREE +#define JEMALLOC_PURGE_MADVISE_DONTNEED +#define JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS + +/* Defined if madvise(2) is available but MADV_FREE is not (x86 Linux only). */ +/* #undef JEMALLOC_DEFINE_MADVISE_FREE */ + +/* + * Defined if MADV_DO[NT]DUMP is supported as an argument to madvise. + */ +#define JEMALLOC_MADVISE_DONTDUMP + +/* + * Defined if MADV_[NO]CORE is supported as an argument to madvise. + */ +/* #undef JEMALLOC_MADVISE_NOCORE */ + +/* Defined if mprotect(2) is available. */ +#define JEMALLOC_HAVE_MPROTECT + +/* + * Defined if transparent huge pages (THPs) are supported via the + * MADV_[NO]HUGEPAGE arguments to madvise(2), and THP support is enabled. + */ +/* #undef JEMALLOC_THP */ + +/* Defined if posix_madvise is available. */ +/* #undef JEMALLOC_HAVE_POSIX_MADVISE */ + +/* + * Method for purging unused pages using posix_madvise. + * + * posix_madvise(..., POSIX_MADV_DONTNEED) + */ +/* #undef JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED */ +/* #undef JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED_ZEROS */ + +/* + * Defined if memcntl page admin call is supported + */ +/* #undef JEMALLOC_HAVE_MEMCNTL */ + +/* + * Defined if malloc_size is supported + */ +/* #undef JEMALLOC_HAVE_MALLOC_SIZE */ + +/* Define if operating system has alloca.h header. */ +#define JEMALLOC_HAS_ALLOCA_H + +/* C99 restrict keyword supported. */ +#define JEMALLOC_HAS_RESTRICT + +/* For use by hash code. */ +/* #undef JEMALLOC_BIG_ENDIAN */ + +/* sizeof(int) == 2^LG_SIZEOF_INT. */ +#define LG_SIZEOF_INT 2 + +/* sizeof(long) == 2^LG_SIZEOF_LONG. */ +#define LG_SIZEOF_LONG 3 + +/* sizeof(long long) == 2^LG_SIZEOF_LONG_LONG. */ +#define LG_SIZEOF_LONG_LONG 3 + +/* sizeof(intmax_t) == 2^LG_SIZEOF_INTMAX_T. */ +#define LG_SIZEOF_INTMAX_T 3 + +/* glibc malloc hooks (__malloc_hook, __realloc_hook, __free_hook). */ +#define JEMALLOC_GLIBC_MALLOC_HOOK + +/* glibc memalign hook. */ +#define JEMALLOC_GLIBC_MEMALIGN_HOOK + +/* pthread support */ +#define JEMALLOC_HAVE_PTHREAD + +/* dlsym() support */ +#define JEMALLOC_HAVE_DLSYM + +/* Adaptive mutex support in pthreads. */ +#define JEMALLOC_HAVE_PTHREAD_MUTEX_ADAPTIVE_NP + +/* GNU specific sched_getcpu support */ +#define JEMALLOC_HAVE_SCHED_GETCPU + +/* GNU specific sched_setaffinity support */ +#define JEMALLOC_HAVE_SCHED_SETAFFINITY + +/* + * If defined, all the features necessary for background threads are present. + */ +#define JEMALLOC_BACKGROUND_THREAD + +/* + * If defined, jemalloc symbols are not exported (doesn't work when + * JEMALLOC_PREFIX is not defined). + */ +/* #undef JEMALLOC_EXPORT */ + +/* config.malloc_conf options string. */ +#define JEMALLOC_CONFIG_MALLOC_CONF "@JEMALLOC_CONFIG_MALLOC_CONF@" + +/* If defined, jemalloc takes the malloc/free/etc. symbol names. */ +#define JEMALLOC_IS_MALLOC + +/* + * Defined if strerror_r returns char * if _GNU_SOURCE is defined. + */ +#define JEMALLOC_STRERROR_R_RETURNS_CHAR_WITH_GNU_SOURCE + +/* Performs additional safety checks when defined. */ +/* #undef JEMALLOC_OPT_SAFETY_CHECKS */ + +/* Is C++ support being built? */ +/* #undef JEMALLOC_ENABLE_CXX */ + +/* Performs additional size checks when defined. */ +/* #undef JEMALLOC_OPT_SIZE_CHECKS */ + +/* Allows sampled junk and stash for checking use-after-free when defined. */ +/* #undef JEMALLOC_UAF_DETECTION */ + +/* Darwin VM_MAKE_TAG support */ +/* #undef JEMALLOC_HAVE_VM_MAKE_TAG */ + +#endif /* JEMALLOC_INTERNAL_DEFS_H_ */ diff --git a/contrib/jemalloc-cmake/include_linux_x86_64/README b/contrib/jemalloc-cmake/include_linux_x86_64/README deleted file mode 100644 index bf7663bda8d..00000000000 --- a/contrib/jemalloc-cmake/include_linux_x86_64/README +++ /dev/null @@ -1,7 +0,0 @@ -Here are pre-generated files from jemalloc on Linux x86_64. -You can obtain these files by running ./autogen.sh inside jemalloc source directory. - -Added #define GNU_SOURCE -Added JEMALLOC_OVERRIDE___POSIX_MEMALIGN because why not. -Removed JEMALLOC_HAVE_ATTR_FORMAT_GNU_PRINTF because it's non standard. -Removed JEMALLOC_PURGE_MADVISE_FREE because it's available only from Linux 4.5. diff --git a/contrib/jemalloc-cmake/include_linux_x86_64/jemalloc/internal/jemalloc_internal_defs.h b/contrib/jemalloc-cmake/include_linux_x86_64/jemalloc/internal/jemalloc_internal_defs.h.in similarity index 78% rename from contrib/jemalloc-cmake/include_linux_x86_64/jemalloc/internal/jemalloc_internal_defs.h rename to contrib/jemalloc-cmake/include_linux_x86_64/jemalloc/internal/jemalloc_internal_defs.h.in index 7c21fa79397..99ab2d53ca9 100644 --- a/contrib/jemalloc-cmake/include_linux_x86_64/jemalloc/internal/jemalloc_internal_defs.h +++ b/contrib/jemalloc-cmake/include_linux_x86_64/jemalloc/internal/jemalloc_internal_defs.h.in @@ -1,11 +1,6 @@ /* include/jemalloc/internal/jemalloc_internal_defs.h. Generated from jemalloc_internal_defs.h.in by configure. */ #ifndef JEMALLOC_INTERNAL_DEFS_H_ #define JEMALLOC_INTERNAL_DEFS_H_ - -#ifndef _GNU_SOURCE - #define _GNU_SOURCE -#endif - /* * If JEMALLOC_PREFIX is defined via --with-jemalloc-prefix, it will cause all * public APIs to be prefixed. This makes it possible, with some care, to use @@ -18,13 +13,15 @@ * Define overrides for non-standard allocator-related functions if they are * present on the system. */ -#define JEMALLOC_OVERRIDE___LIBC_CALLOC -#define JEMALLOC_OVERRIDE___LIBC_FREE -#define JEMALLOC_OVERRIDE___LIBC_MALLOC -#define JEMALLOC_OVERRIDE___LIBC_MEMALIGN -#define JEMALLOC_OVERRIDE___LIBC_REALLOC -#define JEMALLOC_OVERRIDE___LIBC_VALLOC -#define JEMALLOC_OVERRIDE___POSIX_MEMALIGN +#if !defined(USE_MUSL) + #define JEMALLOC_OVERRIDE___LIBC_CALLOC + #define JEMALLOC_OVERRIDE___LIBC_FREE + #define JEMALLOC_OVERRIDE___LIBC_MALLOC + #define JEMALLOC_OVERRIDE___LIBC_MEMALIGN + #define JEMALLOC_OVERRIDE___LIBC_REALLOC + #define JEMALLOC_OVERRIDE___LIBC_VALLOC +#endif +/* #undef JEMALLOC_OVERRIDE___POSIX_MEMALIGN */ /* * JEMALLOC_PRIVATE_NAMESPACE is used as a prefix for all library-private APIs. @@ -50,29 +47,17 @@ #define LG_VADDR 48 /* Defined if C11 atomics are available. */ -#define JEMALLOC_C11_ATOMICS 1 +#define JEMALLOC_C11_ATOMICS /* Defined if GCC __atomic atomics are available. */ -#define JEMALLOC_GCC_ATOMIC_ATOMICS 1 +#define JEMALLOC_GCC_ATOMIC_ATOMICS +/* and the 8-bit variant support. */ +#define JEMALLOC_GCC_U8_ATOMIC_ATOMICS /* Defined if GCC __sync atomics are available. */ -#define JEMALLOC_GCC_SYNC_ATOMICS 1 - -/* - * Defined if __sync_add_and_fetch(uint32_t *, uint32_t) and - * __sync_sub_and_fetch(uint32_t *, uint32_t) are available, despite - * __GCC_HAVE_SYNC_COMPARE_AND_SWAP_4 not being defined (which means the - * functions are defined in libgcc instead of being inlines). - */ -/* #undef JE_FORCE_SYNC_COMPARE_AND_SWAP_4 */ - -/* - * Defined if __sync_add_and_fetch(uint64_t *, uint64_t) and - * __sync_sub_and_fetch(uint64_t *, uint64_t) are available, despite - * __GCC_HAVE_SYNC_COMPARE_AND_SWAP_8 not being defined (which means the - * functions are defined in libgcc instead of being inlines). - */ -/* #undef JE_FORCE_SYNC_COMPARE_AND_SWAP_8 */ +#define JEMALLOC_GCC_SYNC_ATOMICS +/* and the 8-bit variant support. */ +#define JEMALLOC_GCC_U8_SYNC_ATOMICS /* * Defined if __builtin_clz() and __builtin_clzl() are available. @@ -84,20 +69,13 @@ */ /* #undef JEMALLOC_OS_UNFAIR_LOCK */ -/* - * Defined if OSSpin*() functions are available, as provided by Darwin, and - * documented in the spinlock(3) manual page. - */ -/* #undef JEMALLOC_OSSPIN */ - /* Defined if syscall(2) is usable. */ #define JEMALLOC_USE_SYSCALL /* * Defined if secure_getenv(3) is available. */ -// Don't want dependency on newer GLIBC -//#define JEMALLOC_HAVE_SECURE_GETENV +/* #undef JEMALLOC_HAVE_SECURE_GETENV */ /* * Defined if issetugid(2) is available. @@ -110,21 +88,32 @@ /* Defined if pthread_setname_np(3) is available. */ #define JEMALLOC_HAVE_PTHREAD_SETNAME_NP +/* Defined if pthread_getname_np(3) is available. */ +#define JEMALLOC_HAVE_PTHREAD_GETNAME_NP + +/* Defined if pthread_get_name_np(3) is available. */ +/* #undef JEMALLOC_HAVE_PTHREAD_GET_NAME_NP */ + /* * Defined if clock_gettime(CLOCK_MONOTONIC_COARSE, ...) is available. */ -#define JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE 1 +#define JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE /* * Defined if clock_gettime(CLOCK_MONOTONIC, ...) is available. */ -#define JEMALLOC_HAVE_CLOCK_MONOTONIC 1 +#define JEMALLOC_HAVE_CLOCK_MONOTONIC /* * Defined if mach_absolute_time() is available. */ /* #undef JEMALLOC_HAVE_MACH_ABSOLUTE_TIME */ +/* + * Defined if clock_gettime(CLOCK_REALTIME, ...) is available. + */ +#define JEMALLOC_HAVE_CLOCK_REALTIME + /* * Defined if _malloc_thread_cleanup() exists. At least in the case of * FreeBSD, pthread_key_create() allocates, which if used during malloc @@ -160,6 +149,9 @@ /* JEMALLOC_STATS enables statistics calculation. */ #define JEMALLOC_STATS +/* JEMALLOC_EXPERIMENTAL_SMALLOCX_API enables experimental smallocx API. */ +/* #undef JEMALLOC_EXPERIMENTAL_SMALLOCX_API */ + /* JEMALLOC_PROF enables allocation profiling. */ /* #undef JEMALLOC_PROF */ @@ -184,6 +176,9 @@ /* Support utrace(2)-based tracing. */ /* #undef JEMALLOC_UTRACE */ +/* Support utrace(2)-based tracing (label based signature). */ +/* #undef JEMALLOC_UTRACE_LABEL */ + /* Support optional abort() on OOM. */ /* #undef JEMALLOC_XMALLOC */ @@ -199,6 +194,9 @@ /* One page is 2^LG_PAGE bytes. */ #define LG_PAGE 12 +/* Maximum number of regions in a slab. */ +/* #undef CONFIG_LG_SLAB_MAXREGS */ + /* * One huge page is 2^LG_HUGEPAGE bytes. Note that this is defined even if the * system does not explicitly support huge pages; system calls that require @@ -240,6 +238,12 @@ #define JEMALLOC_INTERNAL_FFSL __builtin_ffsl #define JEMALLOC_INTERNAL_FFS __builtin_ffs +/* + * popcount*() functions to use for bitmapping. + */ +#define JEMALLOC_INTERNAL_POPCOUNTL __builtin_popcountl +#define JEMALLOC_INTERNAL_POPCOUNT __builtin_popcount + /* * If defined, explicitly attempt to more uniformly distribute large allocation * pointer alignments across all cache indices. @@ -252,6 +256,12 @@ */ /* #undef JEMALLOC_LOG */ +/* + * If defined, use readlinkat() (instead of readlink()) to follow + * /etc/malloc_conf. + */ +/* #undef JEMALLOC_READLINKAT */ + /* * Darwin (OS X) uses zones to work around Mach-O symbol override shortcomings. */ @@ -288,7 +298,7 @@ * MADV_FREE, though typically with higher * system overhead. */ -//#define JEMALLOC_PURGE_MADVISE_FREE +#define JEMALLOC_PURGE_MADVISE_FREE #define JEMALLOC_PURGE_MADVISE_DONTNEED #define JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS @@ -300,17 +310,46 @@ */ #define JEMALLOC_MADVISE_DONTDUMP +/* + * Defined if MADV_[NO]CORE is supported as an argument to madvise. + */ +/* #undef JEMALLOC_MADVISE_NOCORE */ + +/* Defined if mprotect(2) is available. */ +#define JEMALLOC_HAVE_MPROTECT + /* * Defined if transparent huge pages (THPs) are supported via the * MADV_[NO]HUGEPAGE arguments to madvise(2), and THP support is enabled. */ /* #undef JEMALLOC_THP */ +/* Defined if posix_madvise is available. */ +/* #undef JEMALLOC_HAVE_POSIX_MADVISE */ + +/* + * Method for purging unused pages using posix_madvise. + * + * posix_madvise(..., POSIX_MADV_DONTNEED) + */ +/* #undef JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED */ +/* #undef JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED_ZEROS */ + +/* + * Defined if memcntl page admin call is supported + */ +/* #undef JEMALLOC_HAVE_MEMCNTL */ + +/* + * Defined if malloc_size is supported + */ +/* #undef JEMALLOC_HAVE_MALLOC_SIZE */ + /* Define if operating system has alloca.h header. */ -#define JEMALLOC_HAS_ALLOCA_H 1 +#define JEMALLOC_HAS_ALLOCA_H /* C99 restrict keyword supported. */ -#define JEMALLOC_HAS_RESTRICT 1 +#define JEMALLOC_HAS_RESTRICT /* For use by hash code. */ /* #undef JEMALLOC_BIG_ENDIAN */ @@ -351,7 +390,7 @@ /* * If defined, all the features necessary for background threads are present. */ -#define JEMALLOC_BACKGROUND_THREAD 1 +#define JEMALLOC_BACKGROUND_THREAD /* * If defined, jemalloc symbols are not exported (doesn't work when @@ -360,20 +399,29 @@ /* #undef JEMALLOC_EXPORT */ /* config.malloc_conf options string. */ -#define JEMALLOC_CONFIG_MALLOC_CONF "" +#define JEMALLOC_CONFIG_MALLOC_CONF "@JEMALLOC_CONFIG_MALLOC_CONF@" /* If defined, jemalloc takes the malloc/free/etc. symbol names. */ -#define JEMALLOC_IS_MALLOC 1 +#define JEMALLOC_IS_MALLOC /* * Defined if strerror_r returns char * if _GNU_SOURCE is defined. */ #define JEMALLOC_STRERROR_R_RETURNS_CHAR_WITH_GNU_SOURCE -/* - * popcount*() functions to use for bitmapping. - */ -#define JEMALLOC_INTERNAL_POPCOUNTL __builtin_popcountl -#define JEMALLOC_INTERNAL_POPCOUNT __builtin_popcount +/* Performs additional safety checks when defined. */ +/* #undef JEMALLOC_OPT_SAFETY_CHECKS */ + +/* Is C++ support being built? */ +/* #undef JEMALLOC_ENABLE_CXX */ + +/* Performs additional size checks when defined. */ +/* #undef JEMALLOC_OPT_SIZE_CHECKS */ + +/* Allows sampled junk and stash for checking use-after-free when defined. */ +/* #undef JEMALLOC_UAF_DETECTION */ + +/* Darwin VM_MAKE_TAG support */ +/* #undef JEMALLOC_HAVE_VM_MAKE_TAG */ #endif /* JEMALLOC_INTERNAL_DEFS_H_ */ diff --git a/contrib/jemalloc-cmake/include_linux_x86_64/jemalloc/internal/jemalloc_preamble.h b/contrib/jemalloc-cmake/include_linux_x86_64/jemalloc/internal/jemalloc_preamble.h deleted file mode 100644 index d79551e1f25..00000000000 --- a/contrib/jemalloc-cmake/include_linux_x86_64/jemalloc/internal/jemalloc_preamble.h +++ /dev/null @@ -1,216 +0,0 @@ -#ifndef JEMALLOC_PREAMBLE_H -#define JEMALLOC_PREAMBLE_H - -#include "jemalloc_internal_defs.h" -#include "jemalloc/internal/jemalloc_internal_decls.h" - -#ifdef JEMALLOC_UTRACE -#include -#endif - -#define JEMALLOC_NO_DEMANGLE -#ifdef JEMALLOC_JET -# undef JEMALLOC_IS_MALLOC -# define JEMALLOC_N(n) jet_##n -# include "jemalloc/internal/public_namespace.h" -# define JEMALLOC_NO_RENAME -# include "jemalloc/jemalloc.h" -# undef JEMALLOC_NO_RENAME -#else -# define JEMALLOC_N(n) je_##n -# include "jemalloc/jemalloc.h" -#endif - -#if (defined(JEMALLOC_OSATOMIC) || defined(JEMALLOC_OSSPIN)) -#include -#endif - -#ifdef JEMALLOC_ZONE -#include -#include -#include -#endif - -#include "jemalloc/internal/jemalloc_internal_macros.h" - -/* - * Note that the ordering matters here; the hook itself is name-mangled. We - * want the inclusion of hooks to happen early, so that we hook as much as - * possible. - */ -#ifndef JEMALLOC_NO_PRIVATE_NAMESPACE -# ifndef JEMALLOC_JET -# include "jemalloc/internal/private_namespace.h" -# else -# include "jemalloc/internal/private_namespace_jet.h" -# endif -#endif -#include "jemalloc/internal/test_hooks.h" - -#ifdef JEMALLOC_DEFINE_MADVISE_FREE -# define JEMALLOC_MADV_FREE 8 -#endif - -static const bool config_debug = -#ifdef JEMALLOC_DEBUG - true -#else - false -#endif - ; -static const bool have_dss = -#ifdef JEMALLOC_DSS - true -#else - false -#endif - ; -static const bool have_madvise_huge = -#ifdef JEMALLOC_HAVE_MADVISE_HUGE - true -#else - false -#endif - ; -static const bool config_fill = -#ifdef JEMALLOC_FILL - true -#else - false -#endif - ; -static const bool config_lazy_lock = -#ifdef JEMALLOC_LAZY_LOCK - true -#else - false -#endif - ; -static const char * const config_malloc_conf = JEMALLOC_CONFIG_MALLOC_CONF; -static const bool config_prof = -#ifdef JEMALLOC_PROF - true -#else - false -#endif - ; -static const bool config_prof_libgcc = -#ifdef JEMALLOC_PROF_LIBGCC - true -#else - false -#endif - ; -static const bool config_prof_libunwind = -#ifdef JEMALLOC_PROF_LIBUNWIND - true -#else - false -#endif - ; -static const bool maps_coalesce = -#ifdef JEMALLOC_MAPS_COALESCE - true -#else - false -#endif - ; -static const bool config_stats = -#ifdef JEMALLOC_STATS - true -#else - false -#endif - ; -static const bool config_tls = -#ifdef JEMALLOC_TLS - true -#else - false -#endif - ; -static const bool config_utrace = -#ifdef JEMALLOC_UTRACE - true -#else - false -#endif - ; -static const bool config_xmalloc = -#ifdef JEMALLOC_XMALLOC - true -#else - false -#endif - ; -static const bool config_cache_oblivious = -#ifdef JEMALLOC_CACHE_OBLIVIOUS - true -#else - false -#endif - ; -/* - * Undocumented, for jemalloc development use only at the moment. See the note - * in jemalloc/internal/log.h. - */ -static const bool config_log = -#ifdef JEMALLOC_LOG - true -#else - false -#endif - ; -#ifdef JEMALLOC_HAVE_SCHED_GETCPU -/* Currently percpu_arena depends on sched_getcpu. */ -#define JEMALLOC_PERCPU_ARENA -#endif -static const bool have_percpu_arena = -#ifdef JEMALLOC_PERCPU_ARENA - true -#else - false -#endif - ; -/* - * Undocumented, and not recommended; the application should take full - * responsibility for tracking provenance. - */ -static const bool force_ivsalloc = -#ifdef JEMALLOC_FORCE_IVSALLOC - true -#else - false -#endif - ; -static const bool have_background_thread = -#ifdef JEMALLOC_BACKGROUND_THREAD - true -#else - false -#endif - ; - -#define JEMALLOC_GCC_U8_ATOMIC_ATOMICS 1 -#define JEMALLOC_GCC_U8_SYNC_ATOMICS 1 - -/* - * Are extra safety checks enabled; things like checking the size of sized - * deallocations, double-frees, etc. - */ -static const bool config_opt_safety_checks = -#ifdef JEMALLOC_OPT_SAFETY_CHECKS - true -#elif defined(JEMALLOC_DEBUG) - /* - * This lets us only guard safety checks by one flag instead of two; fast - * checks can guard solely by config_opt_safety_checks and run in debug mode - * too. - */ - true -#else - false -#endif - ; - -#endif /* JEMALLOC_PREAMBLE_H */ diff --git a/contrib/jemalloc-cmake/include_linux_x86_64/jemalloc/jemalloc_macros.h b/contrib/jemalloc-cmake/include_linux_x86_64/jemalloc/jemalloc_macros.h deleted file mode 100644 index 34235894285..00000000000 --- a/contrib/jemalloc-cmake/include_linux_x86_64/jemalloc/jemalloc_macros.h +++ /dev/null @@ -1,129 +0,0 @@ -#include -#include -#include -#include -#include - -#define JEMALLOC_VERSION "5.2.1-0-gea6b3e973b477b8061e0076bb257dbd7f3faa756" -#define JEMALLOC_VERSION_MAJOR 5 -#define JEMALLOC_VERSION_MINOR 2 -#define JEMALLOC_VERSION_BUGFIX 1 -#define JEMALLOC_VERSION_NREV 0 -#define JEMALLOC_VERSION_GID "ea6b3e973b477b8061e0076bb257dbd7f3faa756" -#define JEMALLOC_VERSION_GID_IDENT ea6b3e973b477b8061e0076bb257dbd7f3faa756 - -#define MALLOCX_LG_ALIGN(la) ((int)(la)) -#if LG_SIZEOF_PTR == 2 -# define MALLOCX_ALIGN(a) ((int)(ffs((int)(a))-1)) -#else -# define MALLOCX_ALIGN(a) \ - ((int)(((size_t)(a) < (size_t)INT_MAX) ? ffs((int)(a))-1 : \ - ffs((int)(((size_t)(a))>>32))+31)) -#endif -#define MALLOCX_ZERO ((int)0x40) -/* - * Bias tcache index bits so that 0 encodes "automatic tcache management", and 1 - * encodes MALLOCX_TCACHE_NONE. - */ -#define MALLOCX_TCACHE(tc) ((int)(((tc)+2) << 8)) -#define MALLOCX_TCACHE_NONE MALLOCX_TCACHE(-1) -/* - * Bias arena index bits so that 0 encodes "use an automatically chosen arena". - */ -#define MALLOCX_ARENA(a) ((((int)(a))+1) << 20) - -/* - * Use as arena index in "arena..{purge,decay,dss}" and - * "stats.arenas..*" mallctl interfaces to select all arenas. This - * definition is intentionally specified in raw decimal format to support - * cpp-based string concatenation, e.g. - * - * #define STRINGIFY_HELPER(x) #x - * #define STRINGIFY(x) STRINGIFY_HELPER(x) - * - * mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".purge", NULL, NULL, NULL, - * 0); - */ -#define MALLCTL_ARENAS_ALL 4096 -/* - * Use as arena index in "stats.arenas..*" mallctl interfaces to select - * destroyed arenas. - */ -#define MALLCTL_ARENAS_DESTROYED 4097 - -#if defined(__cplusplus) && defined(JEMALLOC_USE_CXX_THROW) -# define JEMALLOC_CXX_THROW throw() -#else -# define JEMALLOC_CXX_THROW -#endif - -#if defined(_MSC_VER) -# define JEMALLOC_ATTR(s) -# define JEMALLOC_ALIGNED(s) __declspec(align(s)) -# define JEMALLOC_ALLOC_SIZE(s) -# define JEMALLOC_ALLOC_SIZE2(s1, s2) -# ifndef JEMALLOC_EXPORT -# ifdef DLLEXPORT -# define JEMALLOC_EXPORT __declspec(dllexport) -# else -# define JEMALLOC_EXPORT __declspec(dllimport) -# endif -# endif -# define JEMALLOC_FORMAT_ARG(i) -# define JEMALLOC_FORMAT_PRINTF(s, i) -# define JEMALLOC_NOINLINE __declspec(noinline) -# ifdef __cplusplus -# define JEMALLOC_NOTHROW __declspec(nothrow) -# else -# define JEMALLOC_NOTHROW -# endif -# define JEMALLOC_SECTION(s) __declspec(allocate(s)) -# define JEMALLOC_RESTRICT_RETURN __declspec(restrict) -# if _MSC_VER >= 1900 && !defined(__EDG__) -# define JEMALLOC_ALLOCATOR __declspec(allocator) -# else -# define JEMALLOC_ALLOCATOR -# endif -#elif defined(JEMALLOC_HAVE_ATTR) -# define JEMALLOC_ATTR(s) __attribute__((s)) -# define JEMALLOC_ALIGNED(s) JEMALLOC_ATTR(aligned(s)) -# ifdef JEMALLOC_HAVE_ATTR_ALLOC_SIZE -# define JEMALLOC_ALLOC_SIZE(s) JEMALLOC_ATTR(alloc_size(s)) -# define JEMALLOC_ALLOC_SIZE2(s1, s2) JEMALLOC_ATTR(alloc_size(s1, s2)) -# else -# define JEMALLOC_ALLOC_SIZE(s) -# define JEMALLOC_ALLOC_SIZE2(s1, s2) -# endif -# ifndef JEMALLOC_EXPORT -# define JEMALLOC_EXPORT JEMALLOC_ATTR(visibility("default")) -# endif -# ifdef JEMALLOC_HAVE_ATTR_FORMAT_ARG -# define JEMALLOC_FORMAT_ARG(i) JEMALLOC_ATTR(__format_arg__(3)) -# else -# define JEMALLOC_FORMAT_ARG(i) -# endif -# ifdef JEMALLOC_HAVE_ATTR_FORMAT_GNU_PRINTF -# define JEMALLOC_FORMAT_PRINTF(s, i) JEMALLOC_ATTR(format(gnu_printf, s, i)) -# elif defined(JEMALLOC_HAVE_ATTR_FORMAT_PRINTF) -# define JEMALLOC_FORMAT_PRINTF(s, i) JEMALLOC_ATTR(format(printf, s, i)) -# else -# define JEMALLOC_FORMAT_PRINTF(s, i) -# endif -# define JEMALLOC_NOINLINE JEMALLOC_ATTR(noinline) -# define JEMALLOC_NOTHROW JEMALLOC_ATTR(nothrow) -# define JEMALLOC_SECTION(s) JEMALLOC_ATTR(section(s)) -# define JEMALLOC_RESTRICT_RETURN -# define JEMALLOC_ALLOCATOR -#else -# define JEMALLOC_ATTR(s) -# define JEMALLOC_ALIGNED(s) -# define JEMALLOC_ALLOC_SIZE(s) -# define JEMALLOC_ALLOC_SIZE2(s1, s2) -# define JEMALLOC_EXPORT -# define JEMALLOC_FORMAT_PRINTF(s, i) -# define JEMALLOC_NOINLINE -# define JEMALLOC_NOTHROW -# define JEMALLOC_SECTION(s) -# define JEMALLOC_RESTRICT_RETURN -# define JEMALLOC_ALLOCATOR -#endif diff --git a/contrib/jemalloc-cmake/include_linux_x86_64/jemalloc/jemalloc_protos.h b/contrib/jemalloc-cmake/include_linux_x86_64/jemalloc/jemalloc_protos.h deleted file mode 100644 index ff025e30fa7..00000000000 --- a/contrib/jemalloc-cmake/include_linux_x86_64/jemalloc/jemalloc_protos.h +++ /dev/null @@ -1,66 +0,0 @@ -/* - * The je_ prefix on the following public symbol declarations is an artifact - * of namespace management, and should be omitted in application code unless - * JEMALLOC_NO_DEMANGLE is defined (see jemalloc_mangle.h). - */ -extern JEMALLOC_EXPORT const char *je_malloc_conf; -extern JEMALLOC_EXPORT void (*je_malloc_message)(void *cbopaque, - const char *s); - -JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN - void JEMALLOC_NOTHROW *je_malloc(size_t size) - JEMALLOC_CXX_THROW JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE(1); -JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN - void JEMALLOC_NOTHROW *je_calloc(size_t num, size_t size) - JEMALLOC_CXX_THROW JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE2(1, 2); -JEMALLOC_EXPORT int JEMALLOC_NOTHROW je_posix_memalign(void **memptr, - size_t alignment, size_t size) JEMALLOC_CXX_THROW JEMALLOC_ATTR(nonnull(1)); -JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN - void JEMALLOC_NOTHROW *je_aligned_alloc(size_t alignment, - size_t size) JEMALLOC_CXX_THROW JEMALLOC_ATTR(malloc) - JEMALLOC_ALLOC_SIZE(2); -JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN - void JEMALLOC_NOTHROW *je_realloc(void *ptr, size_t size) - JEMALLOC_CXX_THROW JEMALLOC_ALLOC_SIZE(2); -JEMALLOC_EXPORT void JEMALLOC_NOTHROW je_free(void *ptr) - JEMALLOC_CXX_THROW; - -JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN - void JEMALLOC_NOTHROW *je_mallocx(size_t size, int flags) - JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE(1); -JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN - void JEMALLOC_NOTHROW *je_rallocx(void *ptr, size_t size, - int flags) JEMALLOC_ALLOC_SIZE(2); -JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW je_xallocx(void *ptr, size_t size, - size_t extra, int flags); -JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW je_sallocx(const void *ptr, - int flags) JEMALLOC_ATTR(pure); -JEMALLOC_EXPORT void JEMALLOC_NOTHROW je_dallocx(void *ptr, int flags); -JEMALLOC_EXPORT void JEMALLOC_NOTHROW je_sdallocx(void *ptr, size_t size, - int flags); -JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW je_nallocx(size_t size, int flags) - JEMALLOC_ATTR(pure); - -JEMALLOC_EXPORT int JEMALLOC_NOTHROW je_mallctl(const char *name, - void *oldp, size_t *oldlenp, void *newp, size_t newlen); -JEMALLOC_EXPORT int JEMALLOC_NOTHROW je_mallctlnametomib(const char *name, - size_t *mibp, size_t *miblenp); -JEMALLOC_EXPORT int JEMALLOC_NOTHROW je_mallctlbymib(const size_t *mib, - size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen); -JEMALLOC_EXPORT void JEMALLOC_NOTHROW je_malloc_stats_print( - void (*write_cb)(void *, const char *), void *je_cbopaque, - const char *opts); -JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW je_malloc_usable_size( - JEMALLOC_USABLE_SIZE_CONST void *ptr) JEMALLOC_CXX_THROW; - -#ifdef JEMALLOC_OVERRIDE_MEMALIGN -JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN - void JEMALLOC_NOTHROW *je_memalign(size_t alignment, size_t size) - JEMALLOC_CXX_THROW JEMALLOC_ATTR(malloc); -#endif - -#ifdef JEMALLOC_OVERRIDE_VALLOC -JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN - void JEMALLOC_NOTHROW *je_valloc(size_t size) JEMALLOC_CXX_THROW - JEMALLOC_ATTR(malloc); -#endif diff --git a/contrib/jemalloc-cmake/include_linux_x86_64_musl/jemalloc/internal/jemalloc_internal_defs.h.in b/contrib/jemalloc-cmake/include_linux_x86_64_musl/jemalloc/internal/jemalloc_internal_defs.h.in new file mode 100644 index 00000000000..684d4debb14 --- /dev/null +++ b/contrib/jemalloc-cmake/include_linux_x86_64_musl/jemalloc/internal/jemalloc_internal_defs.h.in @@ -0,0 +1,428 @@ +/* include/jemalloc/internal/jemalloc_internal_defs.h. Generated from jemalloc_internal_defs.h.in by configure. */ +#ifndef JEMALLOC_INTERNAL_DEFS_H_ +#define JEMALLOC_INTERNAL_DEFS_H_ +/* + * If JEMALLOC_PREFIX is defined via --with-jemalloc-prefix, it will cause all + * public APIs to be prefixed. This makes it possible, with some care, to use + * multiple allocators simultaneously. + */ +/* #undef JEMALLOC_PREFIX */ +/* #undef JEMALLOC_CPREFIX */ + +/* + * Define overrides for non-standard allocator-related functions if they are + * present on the system. + */ +#if !defined(USE_MUSL) + #define JEMALLOC_OVERRIDE___LIBC_CALLOC + #define JEMALLOC_OVERRIDE___LIBC_FREE + #define JEMALLOC_OVERRIDE___LIBC_MALLOC + #define JEMALLOC_OVERRIDE___LIBC_MEMALIGN + #define JEMALLOC_OVERRIDE___LIBC_REALLOC + #define JEMALLOC_OVERRIDE___LIBC_VALLOC +#endif +/* #undef JEMALLOC_OVERRIDE___POSIX_MEMALIGN */ + +/* + * JEMALLOC_PRIVATE_NAMESPACE is used as a prefix for all library-private APIs. + * For shared libraries, symbol visibility mechanisms prevent these symbols + * from being exported, but for static libraries, naming collisions are a real + * possibility. + */ +#define JEMALLOC_PRIVATE_NAMESPACE je_ + +/* + * Hyper-threaded CPUs may need a special instruction inside spin loops in + * order to yield to another virtual CPU. + */ +#define CPU_SPINWAIT __asm__ volatile("pause") +/* 1 if CPU_SPINWAIT is defined, 0 otherwise. */ +#define HAVE_CPU_SPINWAIT 1 + +/* + * Number of significant bits in virtual addresses. This may be less than the + * total number of bits in a pointer, e.g. on x64, for which the uppermost 16 + * bits are the same as bit 47. + */ +#define LG_VADDR 48 + +/* Defined if C11 atomics are available. */ +#define JEMALLOC_C11_ATOMICS + +/* Defined if GCC __atomic atomics are available. */ +#define JEMALLOC_GCC_ATOMIC_ATOMICS +/* and the 8-bit variant support. */ +#define JEMALLOC_GCC_U8_ATOMIC_ATOMICS + +/* Defined if GCC __sync atomics are available. */ +#define JEMALLOC_GCC_SYNC_ATOMICS +/* and the 8-bit variant support. */ +#define JEMALLOC_GCC_U8_SYNC_ATOMICS + +/* + * Defined if __builtin_clz() and __builtin_clzl() are available. + */ +#define JEMALLOC_HAVE_BUILTIN_CLZ + +/* + * Defined if os_unfair_lock_*() functions are available, as provided by Darwin. + */ +/* #undef JEMALLOC_OS_UNFAIR_LOCK */ + +/* Defined if syscall(2) is usable. */ +#define JEMALLOC_USE_SYSCALL + +/* + * Defined if secure_getenv(3) is available. + */ +/* #undef JEMALLOC_HAVE_SECURE_GETENV */ + +/* + * Defined if issetugid(2) is available. + */ +/* #undef JEMALLOC_HAVE_ISSETUGID */ + +/* Defined if pthread_atfork(3) is available. */ +#define JEMALLOC_HAVE_PTHREAD_ATFORK + +/* Defined if pthread_setname_np(3) is available. */ +#define JEMALLOC_HAVE_PTHREAD_SETNAME_NP + +/// musl doesn't support it +/* Defined if pthread_getname_np(3) is available. */ +/* #define JEMALLOC_HAVE_PTHREAD_GETNAME_NP */ + +/* Defined if pthread_get_name_np(3) is available. */ +/* #undef JEMALLOC_HAVE_PTHREAD_GET_NAME_NP */ + +/* + * Defined if clock_gettime(CLOCK_MONOTONIC_COARSE, ...) is available. + */ +#define JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE + +/* + * Defined if clock_gettime(CLOCK_MONOTONIC, ...) is available. + */ +#define JEMALLOC_HAVE_CLOCK_MONOTONIC + +/* + * Defined if mach_absolute_time() is available. + */ +/* #undef JEMALLOC_HAVE_MACH_ABSOLUTE_TIME */ + +/* + * Defined if clock_gettime(CLOCK_REALTIME, ...) is available. + */ +#define JEMALLOC_HAVE_CLOCK_REALTIME + +/* + * Defined if _malloc_thread_cleanup() exists. At least in the case of + * FreeBSD, pthread_key_create() allocates, which if used during malloc + * bootstrapping will cause recursion into the pthreads library. Therefore, if + * _malloc_thread_cleanup() exists, use it as the basis for thread cleanup in + * malloc_tsd. + */ +/* #undef JEMALLOC_MALLOC_THREAD_CLEANUP */ + +/* + * Defined if threaded initialization is known to be safe on this platform. + * Among other things, it must be possible to initialize a mutex without + * triggering allocation in order for threaded allocation to be safe. + */ +#define JEMALLOC_THREADED_INIT + +/* + * Defined if the pthreads implementation defines + * _pthread_mutex_init_calloc_cb(), in which case the function is used in order + * to avoid recursive allocation during mutex initialization. + */ +/* #undef JEMALLOC_MUTEX_INIT_CB */ + +/* Non-empty if the tls_model attribute is supported. */ +#define JEMALLOC_TLS_MODEL __attribute__((tls_model("initial-exec"))) + +/* + * JEMALLOC_DEBUG enables assertions and other sanity checks, and disables + * inline functions. + */ +/* #undef JEMALLOC_DEBUG */ + +/* JEMALLOC_STATS enables statistics calculation. */ +#define JEMALLOC_STATS + +/* JEMALLOC_EXPERIMENTAL_SMALLOCX_API enables experimental smallocx API. */ +/* #undef JEMALLOC_EXPERIMENTAL_SMALLOCX_API */ + +/* JEMALLOC_PROF enables allocation profiling. */ +/* #undef JEMALLOC_PROF */ + +/* Use libunwind for profile backtracing if defined. */ +/* #undef JEMALLOC_PROF_LIBUNWIND */ + +/* Use libgcc for profile backtracing if defined. */ +/* #undef JEMALLOC_PROF_LIBGCC */ + +/* Use gcc intrinsics for profile backtracing if defined. */ +/* #undef JEMALLOC_PROF_GCC */ + +/* + * JEMALLOC_DSS enables use of sbrk(2) to allocate extents from the data storage + * segment (DSS). + */ +#define JEMALLOC_DSS + +/* Support memory filling (junk/zero). */ +#define JEMALLOC_FILL + +/* Support utrace(2)-based tracing. */ +/* #undef JEMALLOC_UTRACE */ + +/* Support utrace(2)-based tracing (label based signature). */ +/* #undef JEMALLOC_UTRACE_LABEL */ + +/* Support optional abort() on OOM. */ +/* #undef JEMALLOC_XMALLOC */ + +/* Support lazy locking (avoid locking unless a second thread is launched). */ +/* #undef JEMALLOC_LAZY_LOCK */ + +/* + * Minimum allocation alignment is 2^LG_QUANTUM bytes (ignoring tiny size + * classes). + */ +/* #undef LG_QUANTUM */ + +/* One page is 2^LG_PAGE bytes. */ +#define LG_PAGE 12 + +/* Maximum number of regions in a slab. */ +/* #undef CONFIG_LG_SLAB_MAXREGS */ + +/* + * One huge page is 2^LG_HUGEPAGE bytes. Note that this is defined even if the + * system does not explicitly support huge pages; system calls that require + * explicit huge page support are separately configured. + */ +#define LG_HUGEPAGE 21 + +/* + * If defined, adjacent virtual memory mappings with identical attributes + * automatically coalesce, and they fragment when changes are made to subranges. + * This is the normal order of things for mmap()/munmap(), but on Windows + * VirtualAlloc()/VirtualFree() operations must be precisely matched, i.e. + * mappings do *not* coalesce/fragment. + */ +#define JEMALLOC_MAPS_COALESCE + +/* + * If defined, retain memory for later reuse by default rather than using e.g. + * munmap() to unmap freed extents. This is enabled on 64-bit Linux because + * common sequences of mmap()/munmap() calls will cause virtual memory map + * holes. + */ +#define JEMALLOC_RETAIN + +/* TLS is used to map arenas and magazine caches to threads. */ +#define JEMALLOC_TLS + +/* + * Used to mark unreachable code to quiet "end of non-void" compiler warnings. + * Don't use this directly; instead use unreachable() from util.h + */ +#define JEMALLOC_INTERNAL_UNREACHABLE __builtin_unreachable + +/* + * ffs*() functions to use for bitmapping. Don't use these directly; instead, + * use ffs_*() from util.h. + */ +#define JEMALLOC_INTERNAL_FFSLL __builtin_ffsll +#define JEMALLOC_INTERNAL_FFSL __builtin_ffsl +#define JEMALLOC_INTERNAL_FFS __builtin_ffs + +/* + * popcount*() functions to use for bitmapping. + */ +#define JEMALLOC_INTERNAL_POPCOUNTL __builtin_popcountl +#define JEMALLOC_INTERNAL_POPCOUNT __builtin_popcount + +/* + * If defined, explicitly attempt to more uniformly distribute large allocation + * pointer alignments across all cache indices. + */ +#define JEMALLOC_CACHE_OBLIVIOUS + +/* + * If defined, enable logging facilities. We make this a configure option to + * avoid taking extra branches everywhere. + */ +/* #undef JEMALLOC_LOG */ + +/* + * If defined, use readlinkat() (instead of readlink()) to follow + * /etc/malloc_conf. + */ +/* #undef JEMALLOC_READLINKAT */ + +/* + * Darwin (OS X) uses zones to work around Mach-O symbol override shortcomings. + */ +/* #undef JEMALLOC_ZONE */ + +/* + * Methods for determining whether the OS overcommits. + * JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY: Linux's + * /proc/sys/vm.overcommit_memory file. + * JEMALLOC_SYSCTL_VM_OVERCOMMIT: FreeBSD's vm.overcommit sysctl. + */ +/* #undef JEMALLOC_SYSCTL_VM_OVERCOMMIT */ +#define JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY + +/* Defined if madvise(2) is available. */ +#define JEMALLOC_HAVE_MADVISE + +/* + * Defined if transparent huge pages are supported via the MADV_[NO]HUGEPAGE + * arguments to madvise(2). + */ +#define JEMALLOC_HAVE_MADVISE_HUGE + +/* + * Methods for purging unused pages differ between operating systems. + * + * madvise(..., MADV_FREE) : This marks pages as being unused, such that they + * will be discarded rather than swapped out. + * madvise(..., MADV_DONTNEED) : If JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS is + * defined, this immediately discards pages, + * such that new pages will be demand-zeroed if + * the address region is later touched; + * otherwise this behaves similarly to + * MADV_FREE, though typically with higher + * system overhead. + */ +#define JEMALLOC_PURGE_MADVISE_FREE +#define JEMALLOC_PURGE_MADVISE_DONTNEED +#define JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS + +/* Defined if madvise(2) is available but MADV_FREE is not (x86 Linux only). */ +/* #undef JEMALLOC_DEFINE_MADVISE_FREE */ + +/* + * Defined if MADV_DO[NT]DUMP is supported as an argument to madvise. + */ +#define JEMALLOC_MADVISE_DONTDUMP + +/* + * Defined if MADV_[NO]CORE is supported as an argument to madvise. + */ +/* #undef JEMALLOC_MADVISE_NOCORE */ + +/* Defined if mprotect(2) is available. */ +#define JEMALLOC_HAVE_MPROTECT + +/* + * Defined if transparent huge pages (THPs) are supported via the + * MADV_[NO]HUGEPAGE arguments to madvise(2), and THP support is enabled. + */ +/* #undef JEMALLOC_THP */ + +/* Defined if posix_madvise is available. */ +/* #undef JEMALLOC_HAVE_POSIX_MADVISE */ + +/* + * Method for purging unused pages using posix_madvise. + * + * posix_madvise(..., POSIX_MADV_DONTNEED) + */ +/* #undef JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED */ +/* #undef JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED_ZEROS */ + +/* + * Defined if memcntl page admin call is supported + */ +/* #undef JEMALLOC_HAVE_MEMCNTL */ + +/* + * Defined if malloc_size is supported + */ +/* #undef JEMALLOC_HAVE_MALLOC_SIZE */ + +/* Define if operating system has alloca.h header. */ +#define JEMALLOC_HAS_ALLOCA_H + +/* C99 restrict keyword supported. */ +#define JEMALLOC_HAS_RESTRICT + +/* For use by hash code. */ +/* #undef JEMALLOC_BIG_ENDIAN */ + +/* sizeof(int) == 2^LG_SIZEOF_INT. */ +#define LG_SIZEOF_INT 2 + +/* sizeof(long) == 2^LG_SIZEOF_LONG. */ +#define LG_SIZEOF_LONG 3 + +/* sizeof(long long) == 2^LG_SIZEOF_LONG_LONG. */ +#define LG_SIZEOF_LONG_LONG 3 + +/* sizeof(intmax_t) == 2^LG_SIZEOF_INTMAX_T. */ +#define LG_SIZEOF_INTMAX_T 3 + +/* glibc malloc hooks (__malloc_hook, __realloc_hook, __free_hook). */ +#define JEMALLOC_GLIBC_MALLOC_HOOK + +/* glibc memalign hook. */ +#define JEMALLOC_GLIBC_MEMALIGN_HOOK + +/* pthread support */ +#define JEMALLOC_HAVE_PTHREAD + +/* dlsym() support */ +#define JEMALLOC_HAVE_DLSYM + +/* Adaptive mutex support in pthreads. */ +#define JEMALLOC_HAVE_PTHREAD_MUTEX_ADAPTIVE_NP + +/* GNU specific sched_getcpu support */ +#define JEMALLOC_HAVE_SCHED_GETCPU + +/* GNU specific sched_setaffinity support */ +#define JEMALLOC_HAVE_SCHED_SETAFFINITY + +/* + * If defined, all the features necessary for background threads are present. + */ +#define JEMALLOC_BACKGROUND_THREAD + +/* + * If defined, jemalloc symbols are not exported (doesn't work when + * JEMALLOC_PREFIX is not defined). + */ +/* #undef JEMALLOC_EXPORT */ + +/* config.malloc_conf options string. */ +#define JEMALLOC_CONFIG_MALLOC_CONF "@JEMALLOC_CONFIG_MALLOC_CONF@" + +/* If defined, jemalloc takes the malloc/free/etc. symbol names. */ +#define JEMALLOC_IS_MALLOC + +/* + * Defined if strerror_r returns char * if _GNU_SOURCE is defined. + */ +#define JEMALLOC_STRERROR_R_RETURNS_CHAR_WITH_GNU_SOURCE + +/* Performs additional safety checks when defined. */ +/* #undef JEMALLOC_OPT_SAFETY_CHECKS */ + +/* Is C++ support being built? */ +/* #undef JEMALLOC_ENABLE_CXX */ + +/* Performs additional size checks when defined. */ +/* #undef JEMALLOC_OPT_SIZE_CHECKS */ + +/* Allows sampled junk and stash for checking use-after-free when defined. */ +/* #undef JEMALLOC_UAF_DETECTION */ + +/* Darwin VM_MAKE_TAG support */ +/* #undef JEMALLOC_HAVE_VM_MAKE_TAG */ + +#endif /* JEMALLOC_INTERNAL_DEFS_H_ */ diff --git a/contrib/kvproto b/contrib/kvproto index 12e2f5a9d16..a5d4ffd2ba3 160000 --- a/contrib/kvproto +++ b/contrib/kvproto @@ -1 +1 @@ -Subproject commit 12e2f5a9d167f46602804840857ddc8ff06dc695 +Subproject commit a5d4ffd2ba337dad0bc99e9fb53bf665864a3f3b diff --git a/contrib/prometheus-cpp b/contrib/prometheus-cpp index ca1f3463e74..76470b3ec02 160000 --- a/contrib/prometheus-cpp +++ b/contrib/prometheus-cpp @@ -1 +1 @@ -Subproject commit ca1f3463e74d957d1cccddd4a1a29e3e5d34bd83 +Subproject commit 76470b3ec024c8214e1f4253fb1f4c0b28d3df94 diff --git a/contrib/prometheus-cpp-cmake/pull/CMakeLists.txt b/contrib/prometheus-cpp-cmake/pull/CMakeLists.txt index daebd1b7c5a..993618e16ac 100644 --- a/contrib/prometheus-cpp-cmake/pull/CMakeLists.txt +++ b/contrib/prometheus-cpp-cmake/pull/CMakeLists.txt @@ -12,9 +12,18 @@ if(ENABLE_COMPRESSION) endif() add_library(pull + ${PROMETHEUS_SRC_DIR}/pull/src/basic_auth.cc + ${PROMETHEUS_SRC_DIR}/pull/src/basic_auth.h + ${PROMETHEUS_SRC_DIR}/pull/src/endpoint.cc + ${PROMETHEUS_SRC_DIR}/pull/src/endpoint.h ${PROMETHEUS_SRC_DIR}/pull/src/exposer.cc ${PROMETHEUS_SRC_DIR}/pull/src/handler.cc ${PROMETHEUS_SRC_DIR}/pull/src/handler.h + ${PROMETHEUS_SRC_DIR}/pull/src/metrics_collector.cc + ${PROMETHEUS_SRC_DIR}/pull/src/metrics_collector.h + + ${PROMETHEUS_SRC_DIR}/pull/src/detail/base64.h + $<$:$> ) diff --git a/contrib/prometheus-cpp-cmake/push/CMakeLists.txt b/contrib/prometheus-cpp-cmake/push/CMakeLists.txt index 71dad9fb812..b776d17bdaf 100644 --- a/contrib/prometheus-cpp-cmake/push/CMakeLists.txt +++ b/contrib/prometheus-cpp-cmake/push/CMakeLists.txt @@ -3,6 +3,8 @@ if(NOT CURL_FOUND) endif() add_library(push + ${PROMETHEUS_SRC_DIR}/push/src/curl_wrapper.cc + ${PROMETHEUS_SRC_DIR}/push/src/curl_wrapper.h ${PROMETHEUS_SRC_DIR}/push/src/gateway.cc ) diff --git a/contrib/tiflash-proxy b/contrib/tiflash-proxy index ca2f51f94e5..573efc6d3d1 160000 --- a/contrib/tiflash-proxy +++ b/contrib/tiflash-proxy @@ -1 +1 @@ -Subproject commit ca2f51f94e55bdd23749dcc02ab4afb94eeb5ae5 +Subproject commit 573efc6d3d155a9a01da003e70f111485becf2bc diff --git a/contrib/tiflash-proxy-cmake/CMakeLists.txt b/contrib/tiflash-proxy-cmake/CMakeLists.txt index e243ecba37c..e3e2df379a1 100644 --- a/contrib/tiflash-proxy-cmake/CMakeLists.txt +++ b/contrib/tiflash-proxy-cmake/CMakeLists.txt @@ -4,7 +4,11 @@ file(GLOB_RECURSE _TIFLASH_PROXY_SRCS "${_TIFLASH_PROXY_SOURCE_DIR}/*.rs") list(FILTER _TIFLASH_PROXY_SRCS EXCLUDE REGEX ${_TIFLASH_PROXY_SOURCE_DIR}/target/.*) # use `CFLAGS=-w CXXFLAGS=-w` to inhibit warning messages. -set(TIFLASH_RUST_ENV CMAKE=${CMAKE_COMMAND} CFLAGS=-w CXXFLAGS=-w) +if (TIFLASH_LLVM_TOOLCHAIN) + set(TIFLASH_RUST_ENV CMAKE=${CMAKE_COMMAND} "CFLAGS=-w -fuse-ld=lld" "CXXFLAGS=-w -fuse-ld=lld -stdlib=libc++") +else() + set(TIFLASH_RUST_ENV CMAKE=${CMAKE_COMMAND} CFLAGS=-w CXXFLAGS=-w) +endif() if(TIFLASH_LLVM_TOOLCHAIN AND USE_LIBCXX) set(TIFLASH_RUST_LINKER ${CMAKE_CURRENT_BINARY_DIR}/tiflash-linker) diff --git a/contrib/tipb b/contrib/tipb index bfb5c2c5518..0f4f873beca 160000 --- a/contrib/tipb +++ b/contrib/tipb @@ -1 +1 @@ -Subproject commit bfb5c2c55188c254018d3cf77bfad73b4d4b77ec +Subproject commit 0f4f873beca8d5078dde0a23d15ad5ce3188ed0d diff --git a/dbms/src/Columns/ColumnConst.h b/dbms/src/Columns/ColumnConst.h index 27283c0f24a..da071507a72 100644 --- a/dbms/src/Columns/ColumnConst.h +++ b/dbms/src/Columns/ColumnConst.h @@ -233,7 +233,8 @@ class ColumnConst final : public COWPtrHelper template T getValue() const { - return getField().safeGet::Type>(); + auto && tmp = getField(); + return std::move(tmp.safeGet::Type>()); } }; diff --git a/dbms/src/Common/CurrentMetrics.cpp b/dbms/src/Common/CurrentMetrics.cpp index b7ce9fd1e89..8673784c590 100644 --- a/dbms/src/Common/CurrentMetrics.cpp +++ b/dbms/src/Common/CurrentMetrics.cpp @@ -22,6 +22,8 @@ M(OpenFileForReadWrite) \ M(MemoryTracking) \ M(MemoryTrackingInBackgroundProcessingPool) \ + M(LogicalCPUCores) \ + M(MemoryCapacity) \ M(PSMVCCNumSnapshots) \ M(PSMVCCSnapshotsList) \ M(RWLockWaitingReaders) \ diff --git a/dbms/src/Common/FailPoint.cpp b/dbms/src/Common/FailPoint.cpp index c6c3caa44ad..ad5010d7826 100644 --- a/dbms/src/Common/FailPoint.cpp +++ b/dbms/src/Common/FailPoint.cpp @@ -12,7 +12,13 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include #include +#include +#include +#include +#include +#include #include #include @@ -21,7 +27,6 @@ namespace DB { std::unordered_map> FailPointHelper::fail_point_wait_channels; - #define APPLY_FOR_FAILPOINTS_ONCE(M) \ M(exception_between_drop_meta_and_data) \ M(exception_between_alter_data_and_meta) \ @@ -85,33 +90,54 @@ std::unordered_map> FailPointHelper::f M(force_remote_read_for_batch_cop) \ M(force_context_path) \ M(force_slow_page_storage_snapshot_release) \ - M(force_change_all_blobs_to_read_only) - -#define APPLY_FOR_FAILPOINTS_ONCE_WITH_CHANNEL(M) \ - M(pause_with_alter_locks_acquired) \ - M(hang_in_execution) \ - M(pause_before_dt_background_delta_merge) \ - M(pause_until_dt_background_delta_merge) \ - M(pause_before_apply_raft_cmd) \ - M(pause_before_apply_raft_snapshot) \ - M(pause_until_apply_raft_snapshot) \ + M(force_change_all_blobs_to_read_only) \ + M(unblock_query_init_after_write) + + +#define APPLY_FOR_PAUSEABLE_FAILPOINTS_ONCE(M) \ + M(pause_with_alter_locks_acquired) \ + M(hang_in_execution) \ + M(pause_before_dt_background_delta_merge) \ + M(pause_until_dt_background_delta_merge) \ + M(pause_before_apply_raft_cmd) \ + M(pause_before_apply_raft_snapshot) \ + M(pause_until_apply_raft_snapshot) \ M(pause_after_copr_streams_acquired_once) -#define APPLY_FOR_FAILPOINTS_WITH_CHANNEL(M) \ - M(pause_when_reading_from_dt_stream) \ - M(pause_when_writing_to_dt_store) \ - M(pause_when_ingesting_to_dt_store) \ - M(pause_when_altering_dt_store) \ - M(pause_after_copr_streams_acquired) \ - M(pause_before_server_merge_one_delta) +#define APPLY_FOR_PAUSEABLE_FAILPOINTS(M) \ + M(pause_when_reading_from_dt_stream) \ + M(pause_when_writing_to_dt_store) \ + M(pause_when_ingesting_to_dt_store) \ + M(pause_when_altering_dt_store) \ + M(pause_after_copr_streams_acquired) \ + M(pause_before_server_merge_one_delta) \ + M(pause_query_init) + + +#define APPLY_FOR_RANDOM_FAILPOINTS(M) \ + M(random_tunnel_wait_timeout_failpoint) \ + M(random_tunnel_init_rpc_failure_failpoint) \ + M(random_receiver_sync_msg_push_failure_failpoint) \ + M(random_receiver_async_msg_push_failure_failpoint) \ + M(random_limit_check_failpoint) \ + M(random_join_build_failpoint) \ + M(random_join_prob_failpoint) \ + M(random_aggregate_create_state_failpoint) \ + M(random_aggregate_merge_failpoint) \ + M(random_sharedquery_failpoint) \ + M(random_interpreter_failpoint) \ + M(random_task_lifecycle_failpoint) \ + M(random_task_manager_find_task_failure_failpoint) \ + M(random_min_tso_scheduler_failpoint) namespace FailPoints { #define M(NAME) extern const char(NAME)[] = #NAME ""; APPLY_FOR_FAILPOINTS_ONCE(M) APPLY_FOR_FAILPOINTS(M) -APPLY_FOR_FAILPOINTS_ONCE_WITH_CHANNEL(M) -APPLY_FOR_FAILPOINTS_WITH_CHANNEL(M) +APPLY_FOR_PAUSEABLE_FAILPOINTS_ONCE(M) +APPLY_FOR_PAUSEABLE_FAILPOINTS(M) +APPLY_FOR_RANDOM_FAILPOINTS(M) #undef M } // namespace FailPoints @@ -167,15 +193,15 @@ void FailPointHelper::enableFailPoint(const String & fail_point_name) } #define M(NAME) SUB_M(NAME, FIU_ONETIME) - APPLY_FOR_FAILPOINTS_ONCE_WITH_CHANNEL(M) + APPLY_FOR_PAUSEABLE_FAILPOINTS_ONCE(M) #undef M #define M(NAME) SUB_M(NAME, 0) - APPLY_FOR_FAILPOINTS_WITH_CHANNEL(M) + APPLY_FOR_PAUSEABLE_FAILPOINTS(M) #undef M #undef SUB_M - throw Exception("Cannot find fail point " + fail_point_name, ErrorCodes::FAIL_POINT_ERROR); + throw Exception(fmt::format("Cannot find fail point {}", fail_point_name), ErrorCodes::FAIL_POINT_ERROR); } void FailPointHelper::disableFailPoint(const String & fail_point_name) @@ -200,6 +226,41 @@ void FailPointHelper::wait(const String & fail_point_name) ptr->wait(); } } + +void FailPointHelper::initRandomFailPoints(Poco::Util::LayeredConfiguration & config, Poco::Logger * log) +{ + String random_fail_point_cfg = config.getString("flash.random_fail_points", ""); + if (random_fail_point_cfg.empty()) + return; + + Poco::StringTokenizer string_tokens(random_fail_point_cfg, ","); + for (const auto & string_token : string_tokens) + { + Poco::StringTokenizer pair_tokens(string_token, "-"); + RUNTIME_ASSERT((pair_tokens.count() == 2), log, "RandomFailPoints config should be FailPointA-RatioA,FailPointB-RatioB,... format"); + double rate = atof(pair_tokens[1].c_str()); //NOLINT(cert-err34-c): check conversion error manually + RUNTIME_ASSERT((0 <= rate && rate <= 1.0), log, "RandomFailPoint trigger rate should in [0,1], while {}", rate); + enableRandomFailPoint(pair_tokens[0], rate); + } + LOG_FMT_INFO(log, "Enable RandomFailPoints: {}", random_fail_point_cfg); +} + +void FailPointHelper::enableRandomFailPoint(const String & fail_point_name, double rate) +{ +#define SUB_M(NAME) \ + if (fail_point_name == FailPoints::NAME) \ + { \ + fiu_enable_random(FailPoints::NAME, 1, nullptr, 0, rate); \ + return; \ + } + +#define M(NAME) SUB_M(NAME) + APPLY_FOR_RANDOM_FAILPOINTS(M) +#undef M +#undef SUB_M + + throw Exception(fmt::format("Cannot find fail point {}", fail_point_name), ErrorCodes::FAIL_POINT_ERROR); +} #else class FailPointChannel { @@ -210,6 +271,10 @@ void FailPointHelper::enableFailPoint(const String &) {} void FailPointHelper::disableFailPoint(const String &) {} void FailPointHelper::wait(const String &) {} + +void FailPointHelper::initRandomFailPoints(Poco::Util::LayeredConfiguration &, Poco::Logger *) {} + +void FailPointHelper::enableRandomFailPoint(const String &, double) {} #endif } // namespace DB diff --git a/dbms/src/Common/FailPoint.h b/dbms/src/Common/FailPoint.h index 2cf40ad55e4..31df2dbdcd2 100644 --- a/dbms/src/Common/FailPoint.h +++ b/dbms/src/Common/FailPoint.h @@ -21,6 +21,15 @@ #include +namespace Poco +{ +class Logger; +namespace Util +{ +class LayeredConfiguration; +} +} // namespace Poco + namespace DB { namespace ErrorCodes @@ -35,7 +44,6 @@ extern const int FAIL_POINT_ERROR; // When `fail_point` is enabled, wait till it is disabled #define FAIL_POINT_PAUSE(fail_point) fiu_do_on(fail_point, FailPointHelper::wait(fail_point);) - class FailPointChannel; class FailPointHelper { @@ -46,6 +54,16 @@ class FailPointHelper static void wait(const String & fail_point_name); + /* + * For Server RandomFailPoint test usage. When FIU_ENABLE is defined, this function does the following work: + * 1. Return if TiFlash config has empty flash.random_fail_points cfg + * 2. Parse flash.random_fail_points, which expect to has "FailPointA-RatioA,FailPointB-RatioB,..." format + * 3. Call enableRandomFailPoint method with parsed FailPointName and Rate + */ + static void initRandomFailPoints(Poco::Util::LayeredConfiguration & config, Poco::Logger * log); + + static void enableRandomFailPoint(const String & fail_point_name, double rate); + private: static std::unordered_map> fail_point_wait_channels; }; diff --git a/dbms/src/Common/MPMCQueue.h b/dbms/src/Common/MPMCQueue.h index f550ecc7ca2..e005c363eae 100644 --- a/dbms/src/Common/MPMCQueue.h +++ b/dbms/src/Common/MPMCQueue.h @@ -15,6 +15,7 @@ #pragma once #include +#include #include #include @@ -74,56 +75,85 @@ class MPMCQueue destruct(getObj(read_pos)); } - /// Block util: + // Cannot to use copy/move constructor, + // because MPMCQueue maybe used by different threads. + // Copy and move it is dangerous. + DISALLOW_COPY_AND_MOVE(MPMCQueue); + + /// Block until: /// 1. Pop succeeds with a valid T: return true. /// 2. The queue is cancelled or finished: return false. - bool pop(T & obj) + ALWAYS_INLINE bool pop(T & obj) { - return popObj(obj); + return popObj(obj); } - /// Besides all conditions mentioned at `pop`, `tryPop` will return false if `timeout` is exceeded. + /// Besides all conditions mentioned at `pop`, `popTimeout` will return false if `timeout` is exceeded. template - bool tryPop(T & obj, const Duration & timeout) + ALWAYS_INLINE bool popTimeout(T & obj, const Duration & timeout) { /// std::condition_variable::wait_until will always use system_clock. auto deadline = std::chrono::system_clock::now() + timeout; - return popObj(obj, &deadline); + return popObj(obj, &deadline); + } + + /// Non-blocking function. + /// Return true if pop succeed. + /// else return false. + ALWAYS_INLINE bool tryPop(T & obj) + { + return popObj(obj); } - /// Block util: + /// Block until: /// 1. Push succeeds and return true. /// 2. The queue is cancelled and return false. /// 3. The queue has finished and return false. template ALWAYS_INLINE bool push(U && u) { - return pushObj(std::forward(u)); + return pushObj(std::forward(u)); } - /// Besides all conditions mentioned at `push`, `tryPush` will return false if `timeout` is exceeded. + /// Besides all conditions mentioned at `push`, `pushTimeout` will return false if `timeout` is exceeded. template - ALWAYS_INLINE bool tryPush(U && u, const Duration & timeout) + ALWAYS_INLINE bool pushTimeout(U && u, const Duration & timeout) { /// std::condition_variable::wait_until will always use system_clock. auto deadline = std::chrono::system_clock::now() + timeout; - return pushObj(std::forward(u), &deadline); + return pushObj(std::forward(u), &deadline); + } + + /// Non-blocking function. + /// Return true if push succeed. + /// else return false. + template + ALWAYS_INLINE bool tryPush(U && u) + { + return pushObj(std::forward(u)); } /// The same as `push` except it will construct the object in place. template ALWAYS_INLINE bool emplace(Args &&... args) { - return emplaceObj(nullptr, std::forward(args)...); + return emplaceObj(nullptr, std::forward(args)...); } - /// The same as `tryPush` except it will construct the object in place. + /// The same as `pushTimeout` except it will construct the object in place. template - ALWAYS_INLINE bool tryEmplace(Args &&... args, const Duration & timeout) + ALWAYS_INLINE bool emplaceTimeout(Args &&... args, const Duration & timeout) { /// std::condition_variable::wait_until will always use system_clock. auto deadline = std::chrono::system_clock::now() + timeout; - return emplaceObj(&deadline, std::forward(args)...); + return emplaceObj(&deadline, std::forward(args)...); + } + + /// The same as `tryPush` except it will construct the object in place. + template + ALWAYS_INLINE bool tryEmplace(Args &&... args) + { + return emplaceObj(nullptr, std::forward(args)...); } /// Cancel a NORMAL queue will wake up all blocking readers and writers. @@ -233,7 +263,8 @@ class MPMCQueue } } - bool popObj(T & res, const TimePoint * deadline = nullptr) + template + bool popObj(T & res, [[maybe_unused]] const TimePoint * deadline = nullptr) { #ifdef __APPLE__ WaitingNode node; @@ -241,14 +272,16 @@ class MPMCQueue thread_local WaitingNode node; #endif { - /// read_pos < write_pos means the queue isn't empty - auto pred = [&] { - return read_pos < write_pos || !isNormal(); - }; - std::unique_lock lock(mu); - wait(lock, reader_head, node, pred, deadline); + if constexpr (need_wait) + { + /// read_pos < write_pos means the queue isn't empty + auto pred = [&] { + return read_pos < write_pos || !isNormal(); + }; + wait(lock, reader_head, node, pred, deadline); + } if (!isCancelled() && read_pos < write_pos) { @@ -272,21 +305,23 @@ class MPMCQueue return false; } - template - bool assignObj(const TimePoint * deadline, F && assigner) + template + bool assignObj([[maybe_unused]] const TimePoint * deadline, F && assigner) { #ifdef __APPLE__ WaitingNode node; #else thread_local WaitingNode node; #endif - auto pred = [&] { - return write_pos - read_pos < capacity || !isNormal(); - }; - std::unique_lock lock(mu); - wait(lock, writer_head, node, pred, deadline); + if constexpr (need_wait) + { + auto pred = [&] { + return write_pos - read_pos < capacity || !isNormal(); + }; + wait(lock, writer_head, node, pred, deadline); + } /// double check status after potential wait /// check write_pos because timeouted will also reach here. @@ -305,16 +340,16 @@ class MPMCQueue return false; } - template + template ALWAYS_INLINE bool pushObj(U && u, const TimePoint * deadline = nullptr) { - return assignObj(deadline, [&](void * addr) { new (addr) T(std::forward(u)); }); + return assignObj(deadline, [&](void * addr) { new (addr) T(std::forward(u)); }); } - template + template ALWAYS_INLINE bool emplaceObj(const TimePoint * deadline, Args &&... args) { - return assignObj(deadline, [&](void * addr) { new (addr) T(std::forward(args)...); }); + return assignObj(deadline, [&](void * addr) { new (addr) T(std::forward(args)...); }); } ALWAYS_INLINE bool isNormal() const diff --git a/dbms/src/Common/MyDuration.cpp b/dbms/src/Common/MyDuration.cpp index 8801ae0de44..513c40b6dbc 100644 --- a/dbms/src/Common/MyDuration.cpp +++ b/dbms/src/Common/MyDuration.cpp @@ -67,4 +67,4 @@ String MyDuration::toString() const auto frac_str = fmt::format("{:06}", microsecond); return fmt::format(fmt_str, sign > 0 ? "" : "-", hour, minute, second, frac_str); } -} // namespace DB \ No newline at end of file +} // namespace DB diff --git a/dbms/src/Common/Stopwatch.h b/dbms/src/Common/Stopwatch.h index aced9fced11..d33be52fbd6 100644 --- a/dbms/src/Common/Stopwatch.h +++ b/dbms/src/Common/Stopwatch.h @@ -14,6 +14,7 @@ #pragma once +#include #include #include @@ -23,21 +24,28 @@ #include #endif - -namespace StopWatchDetail -{ -inline UInt64 nanoseconds(clockid_t clock_type) +inline UInt64 clock_gettime_ns(clockid_t clock_type = CLOCK_MONOTONIC) { - struct timespec ts; + struct timespec ts + { + }; clock_gettime(clock_type, &ts); - return ts.tv_sec * 1000000000ULL + ts.tv_nsec; + return UInt64(ts.tv_sec * 1000000000LL + ts.tv_nsec); } -inline UInt64 seconds(clockid_t clock_type) + +/// Sometimes monotonic clock may not be monotonic (due to bug in kernel?). +/// It may cause some operations to fail with "Timeout exceeded: elapsed 18446744073.709553 seconds". +/// Takes previously returned value and returns it again if time stepped back for some reason. +inline UInt64 clock_gettime_ns_adjusted(UInt64 prev_time, clockid_t clock_type = CLOCK_MONOTONIC) { - return nanoseconds(clock_type) / 1000000000ULL; -} -} // namespace StopWatchDetail + UInt64 current_time = clock_gettime_ns(clock_type); + if (likely(prev_time <= current_time)) + return current_time; + /// Something probably went completely wrong if time stepped back for more than 1 second. + assert(prev_time - current_time <= 1000000000ULL); + return prev_time; +} /** Differs from Poco::Stopwatch only by using 'clock_gettime' instead of 'gettimeofday', * returns nanoseconds instead of microseconds, and also by other minor differencies. @@ -104,7 +112,7 @@ class Stopwatch clockid_t clock_type; bool is_running = false; - UInt64 nanoseconds() const { return StopWatchDetail::nanoseconds(clock_type); } + UInt64 nanoseconds() const { return clock_gettime_ns_adjusted(start_ns, clock_type); } }; @@ -112,13 +120,18 @@ class AtomicStopwatch { public: explicit AtomicStopwatch(clockid_t clock_type_ = CLOCK_MONOTONIC) - : clock_type(clock_type_) + : start_ns(0) + , clock_type(clock_type_) { restart(); } - void restart() { start_ns = nanoseconds(); } - UInt64 elapsed() const { return nanoseconds() - start_ns; } + void restart() { start_ns = nanoseconds(0); } + UInt64 elapsed() const + { + UInt64 current_start_ns = start_ns; + return nanoseconds(current_start_ns) - start_ns; + } UInt64 elapsedMilliseconds() const { return elapsed() / 1000000UL; } double elapsedSeconds() const { return static_cast(elapsed()) / 1000000000ULL; } @@ -129,8 +142,8 @@ class AtomicStopwatch bool compareAndRestart(double seconds) { UInt64 threshold = seconds * 1000000000ULL; - UInt64 current_ns = nanoseconds(); UInt64 current_start_ns = start_ns; + UInt64 current_ns = nanoseconds(current_start_ns); while (true) { @@ -175,8 +188,8 @@ class AtomicStopwatch Lock compareAndRestartDeferred(double seconds) { UInt64 threshold = seconds * 1000000000ULL; - UInt64 current_ns = nanoseconds(); UInt64 current_start_ns = start_ns; + UInt64 current_ns = nanoseconds(current_start_ns); while (true) { @@ -197,5 +210,5 @@ class AtomicStopwatch clockid_t clock_type; /// Most significant bit is a lock. When it is set, compareAndRestartDeferred method will return false. - UInt64 nanoseconds() const { return StopWatchDetail::nanoseconds(clock_type) & 0x7FFFFFFFFFFFFFFFULL; } + UInt64 nanoseconds(UInt64 prev_time) const { return clock_gettime_ns_adjusted(prev_time, clock_type) & 0x7FFFFFFFFFFFFFFFULL; } }; diff --git a/dbms/src/Common/ThreadMetricUtil.cpp b/dbms/src/Common/ThreadMetricUtil.cpp index aa496b943ab..340417b969b 100644 --- a/dbms/src/Common/ThreadMetricUtil.cpp +++ b/dbms/src/Common/ThreadMetricUtil.cpp @@ -24,7 +24,7 @@ namespace DB { bool tryToResetMaxThreadsMetrics() { - UInt64 now_ts = StopWatchDetail::seconds(CLOCK_MONOTONIC); + UInt64 now_ts = clock_gettime_ns_adjusted(last_max_thds_metric_reset_ts, CLOCK_MONOTONIC); if (now_ts > last_max_thds_metric_reset_ts + max_thds_metric_reset_interval) { last_max_thds_metric_reset_ts = now_ts; diff --git a/dbms/src/Common/tests/gtest_mpmc_queue.cpp b/dbms/src/Common/tests/gtest_mpmc_queue.cpp index 85ad1892067..3f2748b452b 100644 --- a/dbms/src/Common/tests/gtest_mpmc_queue.cpp +++ b/dbms/src/Common/tests/gtest_mpmc_queue.cpp @@ -98,12 +98,14 @@ class MPMCQueueTest : public ::testing::Test void testCannotTryPush(MPMCQueue & queue) { auto old_size = queue.size(); - auto res = queue.tryPush(ValueHelper::make(-1), std::chrono::microseconds(1)); - auto new_size = queue.size(); - if (res) + bool ok1 = queue.tryPush(ValueHelper::make(-1)); + auto new_size1 = queue.size(); + bool ok2 = queue.pushTimeout(ValueHelper::make(-1), std::chrono::microseconds(1)); + auto new_size2 = queue.size(); + if (ok1 || ok2) throw TiFlashTestException("Should push fail"); - if (old_size != new_size) - throw TiFlashTestException(fmt::format("Size changed from {} to {} without push", old_size, new_size)); + if (old_size != new_size1 || old_size != new_size2) + throw TiFlashTestException(fmt::format("Size changed from {} to {} and {} without push", old_size, new_size1, new_size2)); } template @@ -124,12 +126,14 @@ class MPMCQueueTest : public ::testing::Test { auto old_size = queue.size(); T res; - bool ok = queue.tryPop(res, std::chrono::microseconds(1)); - auto new_size = queue.size(); - if (ok) + bool ok1 = queue.tryPop(res); + auto new_size1 = queue.size(); + bool ok2 = queue.popTimeout(res, std::chrono::microseconds(1)); + auto new_size2 = queue.size(); + if (ok1 || ok2) throw TiFlashTestException("Should pop fail"); - if (old_size != new_size) - throw TiFlashTestException(fmt::format("Size changed from {} to {} without pop", old_size, new_size)); + if (old_size != new_size1 || old_size != new_size2) + throw TiFlashTestException(fmt::format("Size changed from {} to {} and {} without pop", old_size, new_size1, new_size2)); } template @@ -474,7 +478,6 @@ class MPMCQueueTest : public ::testing::Test throwOrMove(std::move(rhs)); } - ThrowInjectable & operator=(ThrowInjectable && rhs) { if (this != &rhs) diff --git a/dbms/src/Common/tests/mpmc_queue_perftest.cpp b/dbms/src/Common/tests/mpmc_queue_perftest.cpp index d047b5d498f..ba0d00001a3 100644 --- a/dbms/src/Common/tests/mpmc_queue_perftest.cpp +++ b/dbms/src/Common/tests/mpmc_queue_perftest.cpp @@ -87,7 +87,7 @@ struct Helper> template static void pushOneTo(MPMCQueue & queue, U && data) { - queue.tryPush(std::forward(data), std::chrono::milliseconds(1)); + queue.pushTimeout(std::forward(data), std::chrono::milliseconds(1)); } }; diff --git a/dbms/src/Common/wrapInvocable.h b/dbms/src/Common/wrapInvocable.h index d6cee519835..1c93bb3e782 100644 --- a/dbms/src/Common/wrapInvocable.h +++ b/dbms/src/Common/wrapInvocable.h @@ -35,7 +35,6 @@ inline auto wrapInvocable(bool propagate_memory_tracker, Func && func, Args &&.. // run the task with the parameters provided return std::apply(std::move(func), std::move(args)); }; - return capture; } } // namespace DB diff --git a/dbms/src/Core/Block.cpp b/dbms/src/Core/Block.cpp index 28db7af82e1..971e8f36e2a 100644 --- a/dbms/src/Core/Block.cpp +++ b/dbms/src/Core/Block.cpp @@ -238,10 +238,18 @@ void Block::checkNumberOfRows() const if (rows == -1) rows = size; else if (rows != size) - throw Exception("Sizes of columns doesn't match: " - + data.front().name + ": " + toString(rows) - + ", " + elem.name + ": " + toString(size), + { + auto first_col = data.front(); + throw Exception(fmt::format( + "Sizes of columns doesn't match: {}(id={}): {}, {}(id={}): {}", + first_col.name, + first_col.column_id, + rows, + elem.name, + elem.column_id, + size), ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH); + } } } diff --git a/dbms/src/DataStreams/ParallelAggregatingBlockInputStream.cpp b/dbms/src/DataStreams/ParallelAggregatingBlockInputStream.cpp index f4f8dfc1338..cd9d6235f52 100644 --- a/dbms/src/DataStreams/ParallelAggregatingBlockInputStream.cpp +++ b/dbms/src/DataStreams/ParallelAggregatingBlockInputStream.cpp @@ -24,7 +24,7 @@ namespace DB { ParallelAggregatingBlockInputStream::ParallelAggregatingBlockInputStream( const BlockInputStreams & inputs, - const BlockInputStreamPtr & additional_input_at_end, + const BlockInputStreams & additional_inputs_at_end, const Aggregator::Params & params_, const FileProviderPtr & file_provider_, bool final_, @@ -41,11 +41,10 @@ ParallelAggregatingBlockInputStream::ParallelAggregatingBlockInputStream( , keys_size(params.keys_size) , aggregates_size(params.aggregates_size) , handler(*this) - , processor(inputs, additional_input_at_end, max_threads, handler, log) + , processor(inputs, additional_inputs_at_end, max_threads, handler, log) { children = inputs; - if (additional_input_at_end) - children.push_back(additional_input_at_end); + children.insert(children.end(), additional_inputs_at_end.begin(), additional_inputs_at_end.end()); } @@ -198,8 +197,8 @@ void ParallelAggregatingBlockInputStream::Handler::onException(std::exception_pt /// can not cancel parent inputStream or the exception might be lost if (!parent.executed) - /// kill the processor so ExchangeReceiver will be closed - parent.processor.cancel(true); + /// use cancel instead of kill to avoid too many useless error message + parent.processor.cancel(false); } diff --git a/dbms/src/DataStreams/ParallelAggregatingBlockInputStream.h b/dbms/src/DataStreams/ParallelAggregatingBlockInputStream.h index 41e61786370..907622c8364 100644 --- a/dbms/src/DataStreams/ParallelAggregatingBlockInputStream.h +++ b/dbms/src/DataStreams/ParallelAggregatingBlockInputStream.h @@ -36,7 +36,7 @@ class ParallelAggregatingBlockInputStream : public IProfilingBlockInputStream */ ParallelAggregatingBlockInputStream( const BlockInputStreams & inputs, - const BlockInputStreamPtr & additional_input_at_end, + const BlockInputStreams & additional_inputs_at_end, const Aggregator::Params & params_, const FileProviderPtr & file_provider_, bool final_, diff --git a/dbms/src/DataStreams/ParallelInputsProcessor.h b/dbms/src/DataStreams/ParallelInputsProcessor.h index 34c70a7085e..57ab37e1756 100644 --- a/dbms/src/DataStreams/ParallelInputsProcessor.h +++ b/dbms/src/DataStreams/ParallelInputsProcessor.h @@ -16,6 +16,7 @@ #include #include +#include #include #include #include @@ -83,9 +84,8 @@ template class ParallelInputsProcessor { public: - /** additional_input_at_end - if not nullptr, - * then the blocks from this source will start to be processed only after all other sources are processed. - * This is done in the main thread. + /** additional_inputs_at_end - if not empty, + * then the blocks from the sources will start to be processed only after all other sources are processed. * * Intended for implementation of FULL and RIGHT JOIN * - where you must first make JOIN in parallel, while noting which keys are not found, @@ -93,19 +93,18 @@ class ParallelInputsProcessor */ ParallelInputsProcessor( const BlockInputStreams & inputs_, - const BlockInputStreamPtr & additional_input_at_end_, + const BlockInputStreams & additional_inputs_at_end_, size_t max_threads_, Handler & handler_, const LoggerPtr & log_) : inputs(inputs_) - , additional_input_at_end(additional_input_at_end_) - , max_threads(std::min(inputs_.size(), max_threads_)) + , additional_inputs_at_end(additional_inputs_at_end_) + , max_threads(std::min(std::max(inputs_.size(), additional_inputs_at_end_.size()), max_threads_)) , handler(handler_) + , working_inputs(inputs_) + , working_additional_inputs(additional_inputs_at_end_) , log(log_) - { - for (size_t i = 0; i < inputs_.size(); ++i) - unprepared_inputs.emplace(inputs_[i], i); - } + {} ~ParallelInputsProcessor() { @@ -132,36 +131,21 @@ class ParallelInputsProcessor /// Ask all sources to stop earlier than they run out. void cancel(bool kill) { - finish = true; + working_inputs.available_inputs.cancel(); + working_additional_inputs.available_inputs.cancel(); - for (auto & input : inputs) - { - if (IProfilingBlockInputStream * child = dynamic_cast(&*input)) - { - try - { - child->cancel(kill); - } - catch (...) - { - /** If you can not ask one or more sources to stop. - * (for example, the connection is broken for distributed query processing) - * - then do not care. - */ - LOG_FMT_ERROR(log, "Exception while cancelling {}", child->getName()); - } - } - } + cancelStreams(inputs, kill); + cancelStreams(additional_inputs_at_end, kill); } /// Wait until all threads are finished, before the destructor. void wait() { - if (joined_threads) - return; if (thread_manager) + { thread_manager->wait(); - joined_threads = true; + thread_manager.reset(); + } } size_t getNumActiveThreads() const @@ -181,13 +165,78 @@ class ParallelInputsProcessor BlockInputStreamPtr in; size_t i; /// The source number (for debugging). - InputData() {} + InputData() + : i(0) + {} InputData(const BlockInputStreamPtr & in_, size_t i_) : in(in_) , i(i_) {} }; + struct WorkingInputs + { + explicit WorkingInputs(const BlockInputStreams & inputs_) + : available_inputs(inputs_.size()) + , active_inputs(inputs_.size()) + , unprepared_inputs(inputs_.size()) + { + for (size_t i = 0; i < inputs_.size(); ++i) + unprepared_inputs.emplace(inputs_[i], i); + } + /** A set of available sources that are not currently processed by any thread. + * Each thread takes one source from this set, takes a block out of the source (at this moment the source does the calculations) + * and (if the source is not run out), puts it back into the set of available sources. + * + * The question arises what is better to use: + * - the queue (just processed source will be processed the next time later than the rest) + * - stack (just processed source will be processed as soon as possible). + * + * The stack is better than the queue when you need to do work on reading one source more consequentially, + * and theoretically, this allows you to achieve more consequent/consistent reads from the disk. + * + * But when using the stack, there is a problem with distributed query processing: + * data is read only from a part of the servers, and on the other servers + * a timeout occurs during send, and the request processing ends with an exception. + * + * Therefore, a queue is used. This can be improved in the future. + */ + using AvailableInputs = MPMCQueue; + AvailableInputs available_inputs; + + /// How many active input streams. + std::atomic active_inputs; + + /** For parallel preparing (readPrefix) child streams. + * First, streams are located here. + * After a stream was prepared, it is moved to "available_inputs" for reading. + */ + using UnpreparedInputs = MPMCQueue; + UnpreparedInputs unprepared_inputs; + }; + + void cancelStreams(const BlockInputStreams & streams, bool kill) + { + for (const auto & input : streams) + { + if (auto * p_child = dynamic_cast(&*input)) + { + try + { + p_child->cancel(kill); + } + catch (...) + { + /** If you can not ask one or more sources to stop. + * (for example, the connection is broken for distributed query processing) + * - then do not care. + */ + LOG_FMT_ERROR(log, "Exception while cancelling {}", p_child->getName()); + } + } + } + } + void publishPayload(BlockInputStreamPtr & stream, Block & block, size_t thread_num) { if constexpr (mode == StreamUnionMode::Basic) @@ -201,32 +250,24 @@ class ParallelInputsProcessor void thread(size_t thread_num) { - std::exception_ptr exception; + work(thread_num, working_inputs); + work(thread_num, working_additional_inputs); - try - { - while (!finish) - { - InputData unprepared_input; - { - std::lock_guard lock(unprepared_inputs_mutex); - - if (unprepared_inputs.empty()) - break; - - unprepared_input = unprepared_inputs.front(); - unprepared_inputs.pop(); - } + handler.onFinishThread(thread_num); - unprepared_input.in->readPrefix(); + if (0 == --active_threads) + { + handler.onFinish(); + } + } - { - std::lock_guard lock(available_inputs_mutex); - available_inputs.push(unprepared_input); - } - } + void work(size_t thread_num, WorkingInputs & work) + { + std::exception_ptr exception; - loop(thread_num); + try + { + loop(thread_num, work); } catch (...) { @@ -237,134 +278,63 @@ class ParallelInputsProcessor { handler.onException(exception, thread_num); } - - handler.onFinishThread(thread_num); - - /// The last thread on the output indicates that there is no more data. - if (0 == --active_threads) - { - /// And then it processes an additional source, if there is one. - if (additional_input_at_end) - { - try - { - additional_input_at_end->readPrefix(); - while (Block block = additional_input_at_end->read()) - publishPayload(additional_input_at_end, block, thread_num); - } - catch (...) - { - exception = std::current_exception(); - } - - if (exception) - { - handler.onException(exception, thread_num); - } - } - - handler.onFinish(); /// TODO If in `onFinish` or `onFinishThread` there is an exception, then std::terminate is called. - } } - void loop(size_t thread_num) + /// This function may be called in different threads. + /// If no exception occurs, we can ensure that the work is all done when the function + /// returns in any thread. + void loop(size_t thread_num, WorkingInputs & work) { - while (!finish) /// You may need to stop work earlier than all sources run out. + if (work.active_inputs == 0) { - InputData input; + return; + } - /// Select the next source. - { - std::lock_guard lock(available_inputs_mutex); + InputData input; - /// If there are no free sources, then this thread is no longer needed. (But other threads can work with their sources.) - if (available_inputs.empty()) - break; - - input = available_inputs.front(); + while (work.unprepared_inputs.tryPop(input)) + { + input.in->readPrefix(); - /// We remove the source from the queue of available sources. - available_inputs.pop(); - } + work.available_inputs.push(input); + } + // The condition is false when all input streams are exhausted or + // an exception occurred then the queue was cancelled. + while (work.available_inputs.pop(input)) + { /// The main work. Block block = input.in->read(); + if (block) { - if (finish) - break; - - /// If this source is not run out yet, then put the resulting block in the ready queue. + work.available_inputs.push(input); + publishPayload(input.in, block, thread_num); + } + else + { + if (0 == --work.active_inputs) { - std::lock_guard lock(available_inputs_mutex); - - if (block) - { - available_inputs.push(input); - } - else - { - if (available_inputs.empty()) - break; - } - } - - if (finish) + work.available_inputs.finish(); break; - - if (block) - publishPayload(input.in, block, thread_num); + } } } } - BlockInputStreams inputs; - BlockInputStreamPtr additional_input_at_end; + const BlockInputStreams inputs; + const BlockInputStreams additional_inputs_at_end; unsigned max_threads; Handler & handler; std::shared_ptr thread_manager; - /** A set of available sources that are not currently processed by any thread. - * Each thread takes one source from this set, takes a block out of the source (at this moment the source does the calculations) - * and (if the source is not run out), puts it back into the set of available sources. - * - * The question arises what is better to use: - * - the queue (just processed source will be processed the next time later than the rest) - * - stack (just processed source will be processed as soon as possible). - * - * The stack is better than the queue when you need to do work on reading one source more consequentially, - * and theoretically, this allows you to achieve more consequent/consistent reads from the disk. - * - * But when using the stack, there is a problem with distributed query processing: - * data is read only from a part of the servers, and on the other servers - * a timeout occurs during send, and the request processing ends with an exception. - * - * Therefore, a queue is used. This can be improved in the future. - */ - using AvailableInputs = std::queue; - AvailableInputs available_inputs; - - /** For parallel preparing (readPrefix) child streams. - * First, streams are located here. - * After a stream was prepared, it is moved to "available_inputs" for reading. - */ - using UnpreparedInputs = std::queue; - UnpreparedInputs unprepared_inputs; - - /// For operations with available_inputs. - std::mutex available_inputs_mutex; - - /// For operations with unprepared_inputs. - std::mutex unprepared_inputs_mutex; + WorkingInputs working_inputs; + WorkingInputs working_additional_inputs; /// How many sources ran out. std::atomic active_threads{0}; - /// Finish the threads work (before the sources run out). - std::atomic finish{false}; - /// Wait for the completion of all threads. - std::atomic joined_threads{false}; const LoggerPtr log; }; diff --git a/dbms/src/DataStreams/SharedQueryBlockInputStream.h b/dbms/src/DataStreams/SharedQueryBlockInputStream.h index e7cece67f0b..d7c0707b5aa 100644 --- a/dbms/src/DataStreams/SharedQueryBlockInputStream.h +++ b/dbms/src/DataStreams/SharedQueryBlockInputStream.h @@ -14,6 +14,7 @@ #pragma once +#include #include #include #include @@ -24,6 +25,11 @@ namespace DB { +namespace FailPoints +{ +extern const char random_sharedquery_failpoint[]; +} // namespace FailPoints + /** This block input stream is used by SharedQuery. * It enable multiple threads read from one stream. */ @@ -136,6 +142,7 @@ class SharedQueryBlockInputStream : public IProfilingBlockInputStream in->readPrefix(); while (true) { + FAIL_POINT_TRIGGER_EXCEPTION(FailPoints::random_sharedquery_failpoint); Block block = in->read(); // in is finished or queue is canceled if (!block || !queue.push(block)) diff --git a/dbms/src/DataStreams/SizeLimits.cpp b/dbms/src/DataStreams/SizeLimits.cpp index 7dd5e1524ba..4d1bfaae997 100644 --- a/dbms/src/DataStreams/SizeLimits.cpp +++ b/dbms/src/DataStreams/SizeLimits.cpp @@ -12,22 +12,30 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include -#include #include -#include +#include +#include +#include +#include namespace DB { +namespace FailPoints +{ +extern const char random_limit_check_failpoint[]; +} // namespace FailPoints bool SizeLimits::check(UInt64 rows, UInt64 bytes, const char * what, int exception_code) const { - if (max_rows && rows > max_rows) + bool rows_exceed_limit = max_rows && rows > max_rows; + fiu_do_on(FailPoints::random_limit_check_failpoint, rows_exceed_limit = true;); + if (rows_exceed_limit) { if (overflow_mode == OverflowMode::THROW) throw Exception("Limit for " + std::string(what) + " exceeded, max rows: " + formatReadableQuantity(max_rows) - + ", current rows: " + formatReadableQuantity(rows), exception_code); + + ", current rows: " + formatReadableQuantity(rows), + exception_code); else return false; } @@ -36,7 +44,8 @@ bool SizeLimits::check(UInt64 rows, UInt64 bytes, const char * what, int excepti { if (overflow_mode == OverflowMode::THROW) throw Exception("Limit for " + std::string(what) + " exceeded, max bytes: " + formatReadableSizeWithBinarySuffix(max_bytes) - + ", current bytes: " + formatReadableSizeWithBinarySuffix(bytes), exception_code); + + ", current bytes: " + formatReadableSizeWithBinarySuffix(bytes), + exception_code); else return false; } @@ -44,4 +53,4 @@ bool SizeLimits::check(UInt64 rows, UInt64 bytes, const char * what, int excepti return true; } -} +} // namespace DB diff --git a/dbms/src/DataStreams/TiRemoteBlockInputStream.h b/dbms/src/DataStreams/TiRemoteBlockInputStream.h index f249bf1a0dc..c1afb1e9f4e 100644 --- a/dbms/src/DataStreams/TiRemoteBlockInputStream.h +++ b/dbms/src/DataStreams/TiRemoteBlockInputStream.h @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -58,6 +59,11 @@ class TiRemoteBlockInputStream : public IProfilingBlockInputStream uint64_t total_rows; + // For fine grained shuffle, sender will partition data into muiltiple streams by hashing. + // ExchangeReceiverBlockInputStream only need to read its own stream, i.e., streams[stream_id]. + // CoprocessorBlockInputStream doesn't take care of this. + size_t stream_id; + void initRemoteExecutionSummaries(tipb::SelectResponse & resp, size_t index) { for (const auto & execution_summary : resp.execution_summaries()) @@ -120,7 +126,7 @@ class TiRemoteBlockInputStream : public IProfilingBlockInputStream bool fetchRemoteResult() { - auto result = remote_reader->nextResult(block_queue, sample_block); + auto result = remote_reader->nextResult(block_queue, sample_block, stream_id); if (result.meet_error) { LOG_FMT_WARNING(log, "remote reader meets error: {}", result.error_msg); @@ -168,29 +174,22 @@ class TiRemoteBlockInputStream : public IProfilingBlockInputStream } public: - TiRemoteBlockInputStream(std::shared_ptr remote_reader_, const String & req_id, const String & executor_id) + TiRemoteBlockInputStream(std::shared_ptr remote_reader_, const String & req_id, const String & executor_id, size_t stream_id_) : remote_reader(remote_reader_) , source_num(remote_reader->getSourceNum()) , name(fmt::format("TiRemoteBlockInputStream({})", RemoteReader::name)) , execution_summaries_inited(source_num) , log(Logger::get(name, req_id, executor_id)) , total_rows(0) + , stream_id(stream_id_) { - // generate sample block - ColumnsWithTypeAndName columns; - for (auto & dag_col : remote_reader->getOutputSchema()) - { - auto tp = getDataTypeByColumnInfoForComputingLayer(dag_col.second); - ColumnWithTypeAndName col(tp, dag_col.first); - columns.emplace_back(col); - } - for (size_t i = 0; i < source_num; i++) + for (size_t i = 0; i < source_num; ++i) { execution_summaries_inited[i].store(false); } execution_summaries.resize(source_num); connection_profile_infos.resize(source_num); - sample_block = Block(columns); + sample_block = Block(getColumnWithTypeAndName(toNamesAndTypes(remote_reader->getOutputSchema()))); } Block getHeader() const override { return sample_block; } diff --git a/dbms/src/DataStreams/UnionBlockInputStream.h b/dbms/src/DataStreams/UnionBlockInputStream.h index 251d0663e14..ffcc8d77c10 100644 --- a/dbms/src/DataStreams/UnionBlockInputStream.h +++ b/dbms/src/DataStreams/UnionBlockInputStream.h @@ -94,20 +94,19 @@ class UnionBlockInputStream final : public IProfilingBlockInputStream public: UnionBlockInputStream( BlockInputStreams inputs, - BlockInputStreamPtr additional_input_at_end, + BlockInputStreams additional_inputs_at_end, size_t max_threads, const String & req_id, ExceptionCallback exception_callback_ = ExceptionCallback()) - : output_queue(std::min(inputs.size(), max_threads) * 5) // reduce contention + : output_queue(std::min(std::max(inputs.size(), additional_inputs_at_end.size()), max_threads) * 5) // reduce contention , log(Logger::get(NAME, req_id)) , handler(*this) - , processor(inputs, additional_input_at_end, max_threads, handler, log) + , processor(inputs, additional_inputs_at_end, max_threads, handler, log) , exception_callback(exception_callback_) { // TODO: assert capacity of output_queue is not less than processor.getMaxThreads() children = inputs; - if (additional_input_at_end) - children.push_back(additional_input_at_end); + children.insert(children.end(), additional_inputs_at_end.begin(), additional_inputs_at_end.end()); size_t num_children = children.size(); if (num_children > 1) @@ -293,8 +292,8 @@ class UnionBlockInputStream final : public IProfilingBlockInputStream /// and the exception is lost. output_queue.emplace(exception); /// can not cancel itself or the exception might be lost - /// kill the processor so ExchangeReceiver will be closed - processor.cancel(true); + /// use cancel instead of kill to avoid too many useless error message + processor.cancel(false); } struct Handler diff --git a/dbms/src/DataStreams/tests/union_stream2.cpp b/dbms/src/DataStreams/tests/union_stream2.cpp index f939cda4e14..fb3f7238414 100644 --- a/dbms/src/DataStreams/tests/union_stream2.cpp +++ b/dbms/src/DataStreams/tests/union_stream2.cpp @@ -51,7 +51,7 @@ try for (size_t i = 0, size = streams.size(); i < size; ++i) streams[i] = std::make_shared(streams[i]); - BlockInputStreamPtr stream = std::make_shared>(streams, nullptr, settings.max_threads, /*req_id=*/""); + BlockInputStreamPtr stream = std::make_shared>(streams, BlockInputStreams{}, settings.max_threads, /*req_id=*/""); stream = std::make_shared(stream, 10, 0, ""); WriteBufferFromFileDescriptor wb(STDERR_FILENO); diff --git a/dbms/src/DataTypes/NumberTraits.h b/dbms/src/DataTypes/NumberTraits.h index 925628a8894..a8b91b88075 100644 --- a/dbms/src/DataTypes/NumberTraits.h +++ b/dbms/src/DataTypes/NumberTraits.h @@ -277,6 +277,7 @@ struct ResultOfAbs> }; /** For bitwise operations, an integer is obtained with number of bits is equal to the maximum of the arguments. + * todo: note that MySQL handles only unsigned 64-bit integer argument and result values. We should refine the code. */ template struct ResultOfBit diff --git a/dbms/src/Debug/DBGInvoker.cpp b/dbms/src/Debug/DBGInvoker.cpp index 3f633c08e67..df993d8e6e9 100644 --- a/dbms/src/Debug/DBGInvoker.cpp +++ b/dbms/src/Debug/DBGInvoker.cpp @@ -118,6 +118,10 @@ DBGInvoker::DBGInvoker() regSchemalessFunc("mapped_database", dbgFuncMappedDatabase); regSchemalessFunc("mapped_table", dbgFuncMappedTable); regSchemafulFunc("query_mapped", dbgFuncQueryMapped); + regSchemalessFunc("get_tiflash_replica_count", dbgFuncGetTiflashReplicaCount); + regSchemalessFunc("get_partition_tables_tiflash_replica_count", dbgFuncGetPartitionTablesTiflashReplicaCount); + regSchemalessFunc("get_tiflash_mode", dbgFuncGetTiflashMode); + regSchemalessFunc("get_partition_tables_tiflash_mode", dbgFuncGetPartitionTablesTiflashMode); regSchemalessFunc("search_log_for_key", dbgFuncSearchLogForKey); regSchemalessFunc("tidb_dag", dbgFuncTiDBQueryFromNaturalDag); diff --git a/dbms/src/Debug/MockSchemaGetter.h b/dbms/src/Debug/MockSchemaGetter.h index f02699866ce..11c5d97f036 100644 --- a/dbms/src/Debug/MockSchemaGetter.h +++ b/dbms/src/Debug/MockSchemaGetter.h @@ -17,16 +17,25 @@ #include #include +#include + namespace DB { - struct MockSchemaGetter { TiDB::DBInfoPtr getDatabase(DatabaseID db_id) { return MockTiDB::instance().getDBInfoByID(db_id); } Int64 getVersion() { return MockTiDB::instance().getVersion(); } - SchemaDiff getSchemaDiff(Int64 version) { return MockTiDB::instance().getSchemaDiff(version); } + std::optional getSchemaDiff(Int64 version) + { + return MockTiDB::instance().getSchemaDiff(version); + } + + bool checkSchemaDiffExists(Int64 version) + { + return MockTiDB::instance().checkSchemaDiffExists(version); + } TiDB::TableInfoPtr getTableInfo(DatabaseID, TableID table_id) { return MockTiDB::instance().getTableInfoByID(table_id); } diff --git a/dbms/src/Debug/MockTiDB.cpp b/dbms/src/Debug/MockTiDB.cpp index 42ab56a97c1..99d9625461b 100644 --- a/dbms/src/Debug/MockTiDB.cpp +++ b/dbms/src/Debug/MockTiDB.cpp @@ -221,7 +221,6 @@ TiDB::TableInfoPtr MockTiDB::parseColumns( { String & name = string_tokens[index]; index_info.idx_cols[index].name = name; - index_info.idx_cols[index].offset = pk_column_pos_map[name]; index_info.idx_cols[index].length = -1; } } @@ -302,7 +301,7 @@ int MockTiDB::newTables( tables_by_id.emplace(table->table_info.id, table); tables_by_name.emplace(qualified_name, table); - AffectedOption opt; + AffectedOption opt{}; opt.schema_id = table->database_id; opt.table_id = table->id(); opt.old_schema_id = table->database_id; @@ -571,7 +570,7 @@ void MockTiDB::renameTables(const std::vectordatabase_id; opt.table_id = new_table->id(); opt.old_schema_id = table->database_id; @@ -669,9 +668,14 @@ std::pair MockTiDB::getDBIDByName(const String & database_name return std::make_pair(false, -1); } -SchemaDiff MockTiDB::getSchemaDiff(Int64 version_) +std::optional MockTiDB::getSchemaDiff(Int64 version_) { return version_diff[version_]; } +bool MockTiDB::checkSchemaDiffExists(Int64 version) +{ + return version_diff.find(version) != version_diff.end(); +} + } // namespace DB diff --git a/dbms/src/Debug/MockTiDB.h b/dbms/src/Debug/MockTiDB.h index 36d2af90859..261e547b13a 100644 --- a/dbms/src/Debug/MockTiDB.h +++ b/dbms/src/Debug/MockTiDB.h @@ -127,7 +127,9 @@ class MockTiDB : public ext::Singleton std::pair getDBIDByName(const String & database_name); - SchemaDiff getSchemaDiff(Int64 version); + bool checkSchemaDiffExists(Int64 version); + + std::optional getSchemaDiff(Int64 version); std::unordered_map getDatabases() { return databases; } diff --git a/dbms/src/Debug/astToExecutor.cpp b/dbms/src/Debug/astToExecutor.cpp index fec76d7a085..61f4474f919 100644 --- a/dbms/src/Debug/astToExecutor.cpp +++ b/dbms/src/Debug/astToExecutor.cpp @@ -170,6 +170,7 @@ std::unordered_map func_name_to_sig({ {"cast_decimal_datetime", tipb::ScalarFuncSig::CastDecimalAsTime}, {"cast_time_datetime", tipb::ScalarFuncSig::CastTimeAsTime}, {"cast_string_datetime", tipb::ScalarFuncSig::CastStringAsTime}, + {"concat", tipb::ScalarFuncSig::Concat}, {"round_int", tipb::ScalarFuncSig::RoundInt}, {"round_uint", tipb::ScalarFuncSig::RoundInt}, {"round_dec", tipb::ScalarFuncSig::RoundDec}, @@ -461,6 +462,14 @@ void functionToPB(const DAGSchema & input, ASTFunction * func, tipb::Expr * expr ft->set_collate(collator_id); break; } + case tipb::ScalarFuncSig::Concat: + { + expr->set_sig(it_sig->second); + auto * ft = expr->mutable_field_type(); + ft->set_tp(TiDB::TypeString); + ft->set_collate(collator_id); + break; + } case tipb::ScalarFuncSig::RoundInt: case tipb::ScalarFuncSig::RoundWithFracInt: { @@ -851,6 +860,7 @@ bool ExchangeReceiver::toTiPBExecutor(tipb::Executor * tipb_executor, uint32_t c { tipb_executor->set_tp(tipb::ExecType::TypeExchangeReceiver); tipb_executor->set_executor_id(name); + tipb_executor->set_fine_grained_shuffle_stream_count(fine_grained_shuffle_stream_count); tipb::ExchangeReceiver * exchange_receiver = tipb_executor->mutable_exchange_receiver(); for (auto & field : output_schema) { @@ -1354,6 +1364,7 @@ bool Window::toTiPBExecutor(tipb::Executor * tipb_executor, uint32_t collator_id { tipb_executor->set_tp(tipb::ExecType::TypeWindow); tipb_executor->set_executor_id(name); + tipb_executor->set_fine_grained_shuffle_stream_count(fine_grained_shuffle_stream_count); tipb::Window * window = tipb_executor->mutable_window(); auto & input_schema = children[0]->output_schema; for (const auto & expr : func_descs) @@ -1430,6 +1441,7 @@ bool Sort::toTiPBExecutor(tipb::Executor * tipb_executor, uint32_t collator_id, { tipb_executor->set_tp(tipb::ExecType::TypeSort); tipb_executor->set_executor_id(name); + tipb_executor->set_fine_grained_shuffle_stream_count(fine_grained_shuffle_stream_count); tipb::Sort * sort = tipb_executor->mutable_sort(); sort->set_ispartialsort(is_partial_sort); @@ -1545,7 +1557,7 @@ ExecutorPtr compileAggregation(ExecutorPtr input, size_t & executor_index, ASTPt ci.tp = TiDB::TypeLongLong; ci.flag = TiDB::ColumnFlagUnsigned | TiDB::ColumnFlagNotNull; } - else if (func->name == "max" || func->name == "min" || func->name == "first_row") + else if (func->name == "max" || func->name == "min" || func->name == "first_row" || func->name == "sum") { ci = children_ci[0]; ci.flag &= ~TiDB::ColumnFlagNotNull; @@ -1629,7 +1641,6 @@ ExecutorPtr compileProject(ExecutorPtr input, size_t & executor_index, ASTPtr se } } } - auto project = std::make_shared(executor_index, output_schema, std::move(exprs)); project->children.push_back(input); return project; @@ -1666,13 +1677,13 @@ ExecutorPtr compileExchangeSender(ExecutorPtr input, size_t & executor_index, ti return exchange_sender; } -ExecutorPtr compileExchangeReceiver(size_t & executor_index, DAGSchema schema) +ExecutorPtr compileExchangeReceiver(size_t & executor_index, DAGSchema schema, uint64_t fine_grained_shuffle_stream_count) { - ExecutorPtr exchange_receiver = std::make_shared(executor_index, schema); + ExecutorPtr exchange_receiver = std::make_shared(executor_index, schema, fine_grained_shuffle_stream_count); return exchange_receiver; } -ExecutorPtr compileWindow(ExecutorPtr input, size_t & executor_index, ASTPtr func_desc_list, ASTPtr partition_by_expr_list, ASTPtr order_by_expr_list, mock::MockWindowFrame frame) +ExecutorPtr compileWindow(ExecutorPtr input, size_t & executor_index, ASTPtr func_desc_list, ASTPtr partition_by_expr_list, ASTPtr order_by_expr_list, mock::MockWindowFrame frame, uint64_t fine_grained_shuffle_stream_count) { std::vector partition_columns; if (partition_by_expr_list != nullptr) @@ -1740,12 +1751,13 @@ ExecutorPtr compileWindow(ExecutorPtr input, size_t & executor_index, ASTPtr fun window_exprs, std::move(partition_columns), std::move(order_columns), - frame); + frame, + fine_grained_shuffle_stream_count); window->children.push_back(input); return window; } -ExecutorPtr compileSort(ExecutorPtr input, size_t & executor_index, ASTPtr order_by_expr_list, bool is_partial_sort) +ExecutorPtr compileSort(ExecutorPtr input, size_t & executor_index, ASTPtr order_by_expr_list, bool is_partial_sort, uint64_t fine_grained_shuffle_stream_count) { std::vector order_columns; if (order_by_expr_list != nullptr) @@ -1759,8 +1771,8 @@ ExecutorPtr compileSort(ExecutorPtr input, size_t & executor_index, ASTPtr order compileExpr(input->output_schema, elem->children[0]); } } - ExecutorPtr sort = std::make_shared(executor_index, input->output_schema, std::move(order_columns), is_partial_sort); + ExecutorPtr sort = std::make_shared(executor_index, input->output_schema, std::move(order_columns), is_partial_sort, fine_grained_shuffle_stream_count); sort->children.push_back(input); return sort; } -} // namespace DB \ No newline at end of file +} // namespace DB diff --git a/dbms/src/Debug/astToExecutor.h b/dbms/src/Debug/astToExecutor.h index 4d87c0db77e..f39f4059d26 100644 --- a/dbms/src/Debug/astToExecutor.h +++ b/dbms/src/Debug/astToExecutor.h @@ -139,8 +139,11 @@ struct ExchangeSender : Executor struct ExchangeReceiver : Executor { TaskMetas task_metas; - ExchangeReceiver(size_t & index, const DAGSchema & output) + uint64_t fine_grained_shuffle_stream_count; + + ExchangeReceiver(size_t & index, const DAGSchema & output, uint64_t fine_grained_shuffle_stream_count_ = 0) : Executor(index, "exchange_receiver_" + std::to_string(index), output) + , fine_grained_shuffle_stream_count(fine_grained_shuffle_stream_count_) {} void columnPrune(std::unordered_set &) override { throw Exception("Should not reach here"); } bool toTiPBExecutor(tipb::Executor * tipb_executor, uint32_t collator_id, const MPPInfo & mpp_info, const Context &) override; @@ -292,13 +295,15 @@ struct Window : Executor std::vector partition_by_exprs; std::vector order_by_exprs; MockWindowFrame frame; + uint64_t fine_grained_shuffle_stream_count; - Window(size_t & index_, const DAGSchema & output_schema_, std::vector func_descs_, std::vector partition_by_exprs_, std::vector order_by_exprs_, MockWindowFrame frame_) + Window(size_t & index_, const DAGSchema & output_schema_, std::vector func_descs_, std::vector partition_by_exprs_, std::vector order_by_exprs_, MockWindowFrame frame_, uint64_t fine_grained_shuffle_stream_count_ = 0) : Executor(index_, "window_" + std::to_string(index_), output_schema_) , func_descs(std::move(func_descs_)) , partition_by_exprs(std::move(partition_by_exprs_)) , order_by_exprs(order_by_exprs_) , frame(frame_) + , fine_grained_shuffle_stream_count(fine_grained_shuffle_stream_count_) { } // Currently only use Window Executor in Unit Test which don't call columnPrume. @@ -311,11 +316,13 @@ struct Sort : Executor { std::vector by_exprs; bool is_partial_sort; + uint64_t fine_grained_shuffle_stream_count; - Sort(size_t & index_, const DAGSchema & output_schema_, std::vector by_exprs_, bool is_partial_sort_) + Sort(size_t & index_, const DAGSchema & output_schema_, std::vector by_exprs_, bool is_partial_sort_, uint64_t fine_grained_shuffle_stream_count_ = 0) : Executor(index_, "sort_" + std::to_string(index_), output_schema_) , by_exprs(by_exprs_) , is_partial_sort(is_partial_sort_) + , fine_grained_shuffle_stream_count(fine_grained_shuffle_stream_count_) { } // Currently only use Sort Executor in Unit Test which don't call columnPrume. @@ -343,11 +350,11 @@ ExecutorPtr compileJoin(size_t & executor_index, ExecutorPtr left, ExecutorPtr r ExecutorPtr compileExchangeSender(ExecutorPtr input, size_t & executor_index, tipb::ExchangeType exchange_type); -ExecutorPtr compileExchangeReceiver(size_t & executor_index, DAGSchema schema); +ExecutorPtr compileExchangeReceiver(size_t & executor_index, DAGSchema schema, uint64_t fine_grained_shuffle_stream_count = 0); -ExecutorPtr compileWindow(ExecutorPtr input, size_t & executor_index, ASTPtr func_desc_list, ASTPtr partition_by_expr_list, ASTPtr order_by_expr_list, mock::MockWindowFrame frame); +ExecutorPtr compileWindow(ExecutorPtr input, size_t & executor_index, ASTPtr func_desc_list, ASTPtr partition_by_expr_list, ASTPtr order_by_expr_list, mock::MockWindowFrame frame, uint64_t fine_grained_shuffle_stream_count = 0); -ExecutorPtr compileSort(ExecutorPtr input, size_t & executor_index, ASTPtr order_by_expr_list, bool is_partial_sort); +ExecutorPtr compileSort(ExecutorPtr input, size_t & executor_index, ASTPtr order_by_expr_list, bool is_partial_sort, uint64_t fine_grained_shuffle_stream_count = 0); void literalFieldToTiPBExpr(const ColumnInfo & ci, const Field & field, tipb::Expr * expr, Int32 collator_id); } // namespace DB diff --git a/dbms/src/Debug/dbgFuncCoprocessor.cpp b/dbms/src/Debug/dbgFuncCoprocessor.cpp index e9335d1e2bd..62a8b7537f1 100644 --- a/dbms/src/Debug/dbgFuncCoprocessor.cpp +++ b/dbms/src/Debug/dbgFuncCoprocessor.cpp @@ -290,8 +290,9 @@ BlockInputStreamPtr executeQuery(Context & context, RegionID region_id, const DA tipb_exchange_receiver.encoded_task_meta_size(), 10, /*req_id=*/"", - /*executor_id=*/""); - BlockInputStreamPtr ret = std::make_shared(exchange_receiver, /*req_id=*/"", /*executor_id=*/""); + /*executor_id=*/"", + /*fine_grained_shuffle_stream_count=*/0); + BlockInputStreamPtr ret = std::make_shared(exchange_receiver, /*req_id=*/"", /*executor_id=*/"", /*stream_id*/ 0); return ret; } else diff --git a/dbms/src/Debug/dbgFuncMockRaftCommand.cpp b/dbms/src/Debug/dbgFuncMockRaftCommand.cpp index df93ee1c78d..3626041f428 100644 --- a/dbms/src/Debug/dbgFuncMockRaftCommand.cpp +++ b/dbms/src/Debug/dbgFuncMockRaftCommand.cpp @@ -40,7 +40,7 @@ void MockRaftCommand::dbgFuncRegionBatchSplit(Context & context, const ASTs & ar auto & tmt = context.getTMTContext(); auto & kvstore = tmt.getKVStore(); - RegionID region_id = (RegionID)safeGet(typeid_cast(*args[0]).value); + auto region_id = static_cast(safeGet(typeid_cast(*args[0]).value)); const String & database_name = typeid_cast(*args[1]).name; const String & table_name = typeid_cast(*args[2]).name; auto table = MockTiDB::instance().getTableByName(database_name, table_name); @@ -49,7 +49,7 @@ void MockRaftCommand::dbgFuncRegionBatchSplit(Context & context, const ASTs & ar if (4 + handle_column_size * 4 != args.size()) throw Exception("Args not matched, should be: region-id1, database-name, table-name, start1, end1, start2, end2, region-id2", ErrorCodes::BAD_ARGUMENTS); - RegionID region_id2 = (RegionID)safeGet(typeid_cast(*args[args.size() - 1]).value); + auto region_id2 = static_cast(safeGet(typeid_cast(*args[args.size() - 1]).value)); auto table_id = table->id(); TiKVKey start_key1, start_key2, end_key1, end_key2; @@ -59,9 +59,17 @@ void MockRaftCommand::dbgFuncRegionBatchSplit(Context & context, const ASTs & ar std::vector start_keys2; std::vector end_keys1; std::vector end_keys2; + + std::unordered_map column_name_columns_index_map; + for (size_t i = 0; i < table_info.columns.size(); i++) + { + column_name_columns_index_map.emplace(table_info.columns[i].name, i); + } + for (size_t i = 0; i < handle_column_size; i++) { - auto & column_info = table_info.columns[table_info.getPrimaryIndexInfo().idx_cols[i].offset]; + auto idx = column_name_columns_index_map[table_info.getPrimaryIndexInfo().idx_cols[i].name]; + auto & column_info = table_info.columns[idx]; auto start_field1 = RegionBench::convertField(column_info, typeid_cast(*args[3 + i]).value); TiDB::DatumBumpy start_datum1 = TiDB::DatumBumpy(start_field1, column_info.tp); @@ -88,10 +96,10 @@ void MockRaftCommand::dbgFuncRegionBatchSplit(Context & context, const ASTs & ar } else { - HandleID start1 = (HandleID)safeGet(typeid_cast(*args[3]).value); - HandleID end1 = (HandleID)safeGet(typeid_cast(*args[4]).value); - HandleID start2 = (HandleID)safeGet(typeid_cast(*args[5]).value); - HandleID end2 = (HandleID)safeGet(typeid_cast(*args[6]).value); + auto start1 = static_cast(safeGet(typeid_cast(*args[3]).value)); + auto end1 = static_cast(safeGet(typeid_cast(*args[4]).value)); + auto start2 = static_cast(safeGet(typeid_cast(*args[5]).value)); + auto end2 = static_cast(safeGet(typeid_cast(*args[6]).value)); start_key1 = RecordKVFormat::genKey(table_id, start1); start_key2 = RecordKVFormat::genKey(table_id, start2); end_key1 = RecordKVFormat::genKey(table_id, end1); @@ -110,7 +118,7 @@ void MockRaftCommand::dbgFuncRegionBatchSplit(Context & context, const ASTs & ar request.set_cmd_type(raft_cmdpb::AdminCmdType::BatchSplit); raft_cmdpb::BatchSplitResponse * splits = response.mutable_splits(); { - auto region = splits->add_regions(); + auto * region = splits->add_regions(); region->set_id(region_id); region->set_start_key(start_key1); region->set_end_key(end_key1); @@ -118,7 +126,7 @@ void MockRaftCommand::dbgFuncRegionBatchSplit(Context & context, const ASTs & ar *region->mutable_region_epoch() = new_epoch; } { - auto region = splits->add_regions(); + auto * region = splits->add_regions(); region->set_id(region_id2); region->set_start_key(start_key2); region->set_end_key(end_key2); @@ -144,8 +152,8 @@ void MockRaftCommand::dbgFuncPrepareMerge(Context & context, const ASTs & args, throw Exception("Args not matched, should be: source-id1, target-id2", ErrorCodes::BAD_ARGUMENTS); } - RegionID region_id = (RegionID)safeGet(typeid_cast(*args[0]).value); - RegionID target_id = (RegionID)safeGet(typeid_cast(*args[1]).value); + auto region_id = static_cast(safeGet(typeid_cast(*args[0]).value)); + auto target_id = static_cast(safeGet(typeid_cast(*args[1]).value)); auto & tmt = context.getTMTContext(); auto & kvstore = tmt.getKVStore(); @@ -157,7 +165,7 @@ void MockRaftCommand::dbgFuncPrepareMerge(Context & context, const ASTs & args, { request.set_cmd_type(raft_cmdpb::AdminCmdType::PrepareMerge); - auto prepare_merge = request.mutable_prepare_merge(); + auto * prepare_merge = request.mutable_prepare_merge(); { auto min_index = region->appliedIndex(); prepare_merge->set_min_index(min_index); @@ -184,8 +192,8 @@ void MockRaftCommand::dbgFuncCommitMerge(Context & context, const ASTs & args, D throw Exception("Args not matched, should be: source-id1, current-id2", ErrorCodes::BAD_ARGUMENTS); } - RegionID source_id = (RegionID)safeGet(typeid_cast(*args[0]).value); - RegionID current_id = (RegionID)safeGet(typeid_cast(*args[1]).value); + auto source_id = static_cast(safeGet(typeid_cast(*args[0]).value)); + auto current_id = static_cast(safeGet(typeid_cast(*args[1]).value)); auto & tmt = context.getTMTContext(); auto & kvstore = tmt.getKVStore(); @@ -196,7 +204,7 @@ void MockRaftCommand::dbgFuncCommitMerge(Context & context, const ASTs & args, D { request.set_cmd_type(raft_cmdpb::AdminCmdType::CommitMerge); - auto commit_merge = request.mutable_commit_merge(); + auto * commit_merge = request.mutable_commit_merge(); { commit_merge->set_commit(source_region->appliedIndex()); *commit_merge->mutable_source() = source_region->getMetaRegion(); @@ -220,7 +228,7 @@ void MockRaftCommand::dbgFuncRollbackMerge(Context & context, const ASTs & args, throw Exception("Args not matched, should be: region-id", ErrorCodes::BAD_ARGUMENTS); } - RegionID region_id = (RegionID)safeGet(typeid_cast(*args[0]).value); + auto region_id = static_cast(safeGet(typeid_cast(*args[0]).value)); auto & tmt = context.getTMTContext(); auto & kvstore = tmt.getKVStore(); @@ -231,7 +239,7 @@ void MockRaftCommand::dbgFuncRollbackMerge(Context & context, const ASTs & args, { request.set_cmd_type(raft_cmdpb::AdminCmdType::RollbackMerge); - auto rollback_merge = request.mutable_rollback_merge(); + auto * rollback_merge = request.mutable_rollback_merge(); { auto merge_state = region->getMergeState(); rollback_merge->set_commit(merge_state.commit()); diff --git a/dbms/src/Debug/dbgFuncMockRaftSnapshot.cpp b/dbms/src/Debug/dbgFuncMockRaftSnapshot.cpp index 9d5b848ddea..b5d3f252d0a 100644 --- a/dbms/src/Debug/dbgFuncMockRaftSnapshot.cpp +++ b/dbms/src/Debug/dbgFuncMockRaftSnapshot.cpp @@ -68,6 +68,12 @@ RegionPtr GenDbgRegionSnapshotWithData(Context & context, const ASTs & args) size_t handle_column_size = is_common_handle ? table_info.getPrimaryIndexInfo().idx_cols.size() : 1; RegionPtr region; + std::unordered_map column_name_columns_index_map; + for (size_t i = 0; i < table_info.columns.size(); i++) + { + column_name_columns_index_map.emplace(table_info.columns[i].name, i); + } + if (!is_common_handle) { auto start = static_cast(safeGet(typeid_cast(*args[3]).value)); @@ -81,7 +87,8 @@ RegionPtr GenDbgRegionSnapshotWithData(Context & context, const ASTs & args) std::vector end_keys; for (size_t i = 0; i < handle_column_size; i++) { - auto & column_info = table_info.columns[table_info.getPrimaryIndexInfo().idx_cols[i].offset]; + auto idx = column_name_columns_index_map[table_info.getPrimaryIndexInfo().idx_cols[i].name]; + auto & column_info = table_info.columns[idx]; auto start_field = RegionBench::convertField(column_info, typeid_cast(*args[3 + i]).value); TiDB::DatumBumpy start_datum = TiDB::DatumBumpy(start_field, column_info.tp); start_keys.emplace_back(start_datum.field()); @@ -122,9 +129,9 @@ RegionPtr GenDbgRegionSnapshotWithData(Context & context, const ASTs & args) std::vector keys; // handle key for (size_t i = 0; i < table_info.getPrimaryIndexInfo().idx_cols.size(); i++) { - auto & idx_col = table_info.getPrimaryIndexInfo().idx_cols[i]; - auto & column_info = table_info.columns[idx_col.offset]; - auto start_field = RegionBench::convertField(column_info, fields[idx_col.offset]); + auto idx = column_name_columns_index_map[table_info.getPrimaryIndexInfo().idx_cols[i].name]; + auto & column_info = table_info.columns[idx]; + auto start_field = RegionBench::convertField(column_info, fields[idx]); TiDB::DatumBumpy start_datum = TiDB::DatumBumpy(start_field, column_info.tp); keys.emplace_back(start_datum.field()); } @@ -198,9 +205,16 @@ void MockRaftCommand::dbgFuncRegionSnapshot(Context & context, const ASTs & args // Get start key and end key form multiple column if it is clustered_index. std::vector start_keys; std::vector end_keys; + + std::unordered_map column_name_columns_index_map; + for (size_t i = 0; i < table_info.columns.size(); i++) + { + column_name_columns_index_map.emplace(table_info.columns[i].name, i); + } for (size_t i = 0; i < handle_column_size; i++) { - const auto & column_info = table_info.columns[table_info.getPrimaryIndexInfo().idx_cols[i].offset]; + auto idx = column_name_columns_index_map[table_info.getPrimaryIndexInfo().idx_cols[i].name]; + const auto & column_info = table_info.columns[idx]; auto start_field = RegionBench::convertField(column_info, typeid_cast(*args[1 + i]).value); TiDB::DatumBumpy start_datum = TiDB::DatumBumpy(start_field, column_info.tp); start_keys.emplace_back(start_datum.field()); diff --git a/dbms/src/Debug/dbgFuncRegion.cpp b/dbms/src/Debug/dbgFuncRegion.cpp index b2024eac1d8..f65a18b8fd0 100644 --- a/dbms/src/Debug/dbgFuncRegion.cpp +++ b/dbms/src/Debug/dbgFuncRegion.cpp @@ -61,9 +61,15 @@ void dbgFuncPutRegion(Context & context, const ASTs & args, DBGInvoker::Printer { std::vector start_keys; std::vector end_keys; + std::unordered_map column_name_columns_index_map; + for (size_t i = 0; i < table_info.columns.size(); i++) + { + column_name_columns_index_map.emplace(table_info.columns[i].name, i); + } for (size_t i = 0; i < handle_column_size; i++) { - const auto & column_info = table_info.columns[table_info.getPrimaryIndexInfo().idx_cols[i].offset]; + auto idx = column_name_columns_index_map[table_info.getPrimaryIndexInfo().idx_cols[i].name]; + const auto & column_info = table_info.columns[idx]; auto start_field = RegionBench::convertField(column_info, typeid_cast(*args[1 + i]).value); TiDB::DatumBumpy start_datum = TiDB::DatumBumpy(start_field, column_info.tp); start_keys.emplace_back(start_datum.field()); diff --git a/dbms/src/Debug/dbgFuncSchema.cpp b/dbms/src/Debug/dbgFuncSchema.cpp index c388015dc10..9ef07f16e8b 100644 --- a/dbms/src/Debug/dbgFuncSchema.cpp +++ b/dbms/src/Debug/dbgFuncSchema.cpp @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -137,4 +138,5 @@ void dbgFuncIsTombstone(Context & context, const ASTs & args, DBGInvoker::Printe output(fmt_buf.toString()); } + } // namespace DB \ No newline at end of file diff --git a/dbms/src/Debug/dbgFuncSchema.h b/dbms/src/Debug/dbgFuncSchema.h index 162bc0af46b..51ab3ad41cf 100644 --- a/dbms/src/Debug/dbgFuncSchema.h +++ b/dbms/src/Debug/dbgFuncSchema.h @@ -46,5 +46,4 @@ void dbgFuncResetSchemas(Context & context, const ASTs & args, DBGInvoker::Print // Usage: // ./storage-client.sh "DBGInvoke is_tombstone(db_name, table_name)" void dbgFuncIsTombstone(Context & context, const ASTs & args, DBGInvoker::Printer output); - } // namespace DB diff --git a/dbms/src/Debug/dbgFuncSchemaName.cpp b/dbms/src/Debug/dbgFuncSchemaName.cpp index 4c2ad86bd62..3aa7b6e3af4 100644 --- a/dbms/src/Debug/dbgFuncSchemaName.cpp +++ b/dbms/src/Debug/dbgFuncSchemaName.cpp @@ -128,4 +128,109 @@ BlockInputStreamPtr dbgFuncQueryMapped(Context & context, const ASTs & args) return executeQuery(query, context, true).in; } + +void dbgFuncGetTiflashReplicaCount(Context & context, const ASTs & args, DBGInvoker::Printer output) +{ + if (args.empty() || args.size() != 2) + throw Exception("Args not matched, should be: database-name[, table-name]", ErrorCodes::BAD_ARGUMENTS); + + const String & database_name = typeid_cast(*args[0]).name; + FmtBuffer fmt_buf; + + const String & table_name = typeid_cast(*args[1]).name; + auto mapped = mappedTable(context, database_name, table_name); + auto storage = context.getTable(mapped->first, mapped->second); + auto managed_storage = std::dynamic_pointer_cast(storage); + if (!managed_storage) + throw Exception(database_name + "." + table_name + " is not ManageableStorage", ErrorCodes::BAD_ARGUMENTS); + + fmt_buf.append((std::to_string(managed_storage->getTableInfo().replica_info.count))); + + output(fmt_buf.toString()); +} + +void dbgFuncGetPartitionTablesTiflashReplicaCount(Context & context, const ASTs & args, DBGInvoker::Printer output) +{ + if (args.empty() || args.size() != 2) + throw Exception("Args not matched, should be: database-name[, table-name]", ErrorCodes::BAD_ARGUMENTS); + + const String & database_name = typeid_cast(*args[0]).name; + FmtBuffer fmt_buf; + + const String & table_name = typeid_cast(*args[1]).name; + auto mapped = mappedTable(context, database_name, table_name); + auto storage = context.getTable(mapped->first, mapped->second); + auto managed_storage = std::dynamic_pointer_cast(storage); + if (!managed_storage) + throw Exception(database_name + "." + table_name + " is not ManageableStorage", ErrorCodes::BAD_ARGUMENTS); + + auto table_info = managed_storage->getTableInfo(); + + if (!table_info.isLogicalPartitionTable()) + throw Exception(database_name + "." + table_name + " is not logical partition table", ErrorCodes::BAD_ARGUMENTS); + + SchemaNameMapper name_mapper; + for (const auto & part_def : table_info.partition.definitions) + { + auto paritition_table_info = table_info.producePartitionTableInfo(part_def.id, name_mapper); + auto partition_storage = context.getTMTContext().getStorages().get(paritition_table_info->id); + fmt_buf.append((std::to_string(partition_storage->getTableInfo().replica_info.count))); + fmt_buf.append("/"); + } + + output(fmt_buf.toString()); +} + +void dbgFuncGetTiflashMode(Context & context, const ASTs & args, DBGInvoker::Printer output) +{ + if (args.empty() || args.size() != 2) + throw Exception("Args not matched, should be: database-name[, table-name]", ErrorCodes::BAD_ARGUMENTS); + + const String & database_name = typeid_cast(*args[0]).name; + FmtBuffer fmt_buf; + + const String & table_name = typeid_cast(*args[1]).name; + auto mapped = mappedTable(context, database_name, table_name); + auto storage = context.getTable(mapped->first, mapped->second); + auto managed_storage = std::dynamic_pointer_cast(storage); + if (!managed_storage) + throw Exception(database_name + "." + table_name + " is not ManageableStorage", ErrorCodes::BAD_ARGUMENTS); + + fmt_buf.append((TiFlashModeToString(managed_storage->getTableInfo().tiflash_mode))); + + output(fmt_buf.toString()); +} + +void dbgFuncGetPartitionTablesTiflashMode(Context & context, const ASTs & args, DBGInvoker::Printer output) +{ + if (args.empty() || args.size() != 2) + throw Exception("Args not matched, should be: database-name[, table-name]", ErrorCodes::BAD_ARGUMENTS); + + const String & database_name = typeid_cast(*args[0]).name; + FmtBuffer fmt_buf; + + const String & table_name = typeid_cast(*args[1]).name; + auto mapped = mappedTable(context, database_name, table_name); + auto storage = context.getTable(mapped->first, mapped->second); + auto managed_storage = std::dynamic_pointer_cast(storage); + if (!managed_storage) + throw Exception(database_name + "." + table_name + " is not ManageableStorage", ErrorCodes::BAD_ARGUMENTS); + + auto table_info = managed_storage->getTableInfo(); + + if (!table_info.isLogicalPartitionTable()) + throw Exception(database_name + "." + table_name + " is not logical partition table", ErrorCodes::BAD_ARGUMENTS); + + SchemaNameMapper name_mapper; + for (const auto & part_def : table_info.partition.definitions) + { + auto paritition_table_info = table_info.producePartitionTableInfo(part_def.id, name_mapper); + auto partition_storage = context.getTMTContext().getStorages().get(paritition_table_info->id); + fmt_buf.append((TiFlashModeToString(partition_storage->getTableInfo().tiflash_mode))); + fmt_buf.append("/"); + } + + output(fmt_buf.toString()); +} + } // namespace DB diff --git a/dbms/src/Debug/dbgFuncSchemaName.h b/dbms/src/Debug/dbgFuncSchemaName.h index 8e95aaab908..ec18f89e911 100644 --- a/dbms/src/Debug/dbgFuncSchemaName.h +++ b/dbms/src/Debug/dbgFuncSchemaName.h @@ -40,4 +40,24 @@ void dbgFuncMappedTable(Context & context, const ASTs & args, DBGInvoker::Printe // ./storage-client.sh "DBGInvoke query_mapped('select * from $d.$t', database_name[, table_name])" BlockInputStreamPtr dbgFuncQueryMapped(Context & context, const ASTs & args); +// Get table's tiflash replica counts with mapped table name +// Usage: +// ./storage-client.sh "DBGInvoke get_tiflash_replica_count(db_name, table_name)" +void dbgFuncGetTiflashReplicaCount(Context & context, const ASTs & args, DBGInvoker::Printer output); + +// Get the logical table's partition tables' tiflash replica counts with mapped table name +// Usage: +// ./storage-client.sh "DBGInvoke get_partition_tables_tiflash_replica_count(db_name, table_name)" +void dbgFuncGetPartitionTablesTiflashReplicaCount(Context & context, const ASTs & args, DBGInvoker::Printer output); + +// Get table's tiflash mode with mapped table name +// Usage: +// ./storage-client.sh "DBGInvoke get_tiflash_mode(db_name, table_name)" +void dbgFuncGetTiflashMode(Context & context, const ASTs & args, DBGInvoker::Printer output); + +// Get the logical table's partition tables' tiflash replica counts with mapped table name +// Usage: +// ./storage-client.sh "DBGInvoke get_partition_tables_tiflash_mode(db_name, table_name)" +void dbgFuncGetPartitionTablesTiflashMode(Context & context, const ASTs & args, DBGInvoker::Printer output); + } // namespace DB diff --git a/dbms/src/Debug/dbgTools.cpp b/dbms/src/Debug/dbgTools.cpp index 685b2563a3b..854d8a18bd5 100644 --- a/dbms/src/Debug/dbgTools.cpp +++ b/dbms/src/Debug/dbgTools.cpp @@ -310,7 +310,7 @@ void insert( // // Parse the fields in the inserted row std::vector fields; { - for (ASTs::const_iterator it = values_begin; it != values_end; ++it) + for (auto it = values_begin; it != values_end; ++it) { auto field = typeid_cast((*it).get())->value; fields.emplace_back(field); @@ -330,11 +330,18 @@ void insert( // if (table_info.is_common_handle) { std::vector keys; + + std::unordered_map column_name_columns_index_map; + for (size_t i = 0; i < table_info.columns.size(); i++) + { + column_name_columns_index_map.emplace(table_info.columns[i].name, i); + } + for (size_t i = 0; i < table_info.getPrimaryIndexInfo().idx_cols.size(); i++) { - const auto & idx_col = table_info.getPrimaryIndexInfo().idx_cols[i]; - const auto & column_info = table_info.columns[idx_col.offset]; - auto start_field = RegionBench::convertField(column_info, fields[idx_col.offset]); + const auto & col_idx = column_name_columns_index_map[table_info.getPrimaryIndexInfo().idx_cols[i].name]; + const auto & column_info = table_info.columns[col_idx]; + auto start_field = RegionBench::convertField(column_info, fields[col_idx]); TiDB::DatumBumpy start_datum = TiDB::DatumBumpy(start_field, column_info.tp); keys.emplace_back(start_datum.field()); } diff --git a/dbms/src/Flash/Coprocessor/ArrowColCodec.cpp b/dbms/src/Flash/Coprocessor/ArrowColCodec.cpp index a1c6061948a..1609c83b029 100644 --- a/dbms/src/Flash/Coprocessor/ArrowColCodec.cpp +++ b/dbms/src/Flash/Coprocessor/ArrowColCodec.cpp @@ -20,7 +20,6 @@ #include #include #include -#include #include #include #include @@ -41,7 +40,7 @@ extern const int NOT_IMPLEMENTED; const IColumn * getNestedCol(const IColumn * flash_col) { if (flash_col->isColumnNullable()) - return dynamic_cast(flash_col)->getNestedColumnPtr().get(); + return static_cast(flash_col)->getNestedColumnPtr().get(); else return flash_col; } @@ -75,8 +74,8 @@ bool flashDecimalColToArrowColInternal( const IColumn * nested_col = getNestedCol(flash_col_untyped); if (checkColumn>(nested_col) && checkDataType>(data_type)) { - const ColumnDecimal * flash_col = checkAndGetColumn>(nested_col); - const DataTypeDecimal * type = checkAndGetDataType>(data_type); + const auto * flash_col = checkAndGetColumn>(nested_col); + const auto * type = checkAndGetDataType>(data_type); UInt32 scale = type->getScale(); for (size_t i = start_index; i < end_index; i++) { @@ -92,8 +91,8 @@ bool flashDecimalColToArrowColInternal( std::vector digits; digits.reserve(type->getPrec()); decimalToVector(dec.value, digits, scale); - TiDBDecimal tiDecimal(scale, digits, dec.value < 0); - dag_column.append(tiDecimal); + TiDBDecimal ti_decimal(scale, digits, dec.value < 0); + dag_column.append(ti_decimal); } return true; } @@ -121,7 +120,7 @@ template bool flashIntegerColToArrowColInternal(TiDBColumn & dag_column, const IColumn * flash_col_untyped, size_t start_index, size_t end_index) { const IColumn * nested_col = getNestedCol(flash_col_untyped); - if (const ColumnVector * flash_col = checkAndGetColumn>(nested_col)) + if (const auto * flash_col = checkAndGetColumn>(nested_col)) { constexpr bool is_unsigned = std::is_unsigned_v; for (size_t i = start_index; i < end_index; i++) @@ -135,9 +134,9 @@ bool flashIntegerColToArrowColInternal(TiDBColumn & dag_column, const IColumn * } } if constexpr (is_unsigned) - dag_column.append((UInt64)flash_col->getElement(i)); + dag_column.append(static_cast(flash_col->getElement(i))); else - dag_column.append((Int64)flash_col->getElement(i)); + dag_column.append(static_cast(flash_col->getElement(i))); } return true; } @@ -148,7 +147,7 @@ template void flashDoubleColToArrowCol(TiDBColumn & dag_column, const IColumn * flash_col_untyped, size_t start_index, size_t end_index) { const IColumn * nested_col = getNestedCol(flash_col_untyped); - if (const ColumnVector * flash_col = checkAndGetColumn>(nested_col)) + if (const auto * flash_col = checkAndGetColumn>(nested_col)) { for (size_t i = start_index; i < end_index; i++) { @@ -160,7 +159,7 @@ void flashDoubleColToArrowCol(TiDBColumn & dag_column, const IColumn * flash_col continue; } } - dag_column.append((T)flash_col->getElement(i)); + dag_column.append(static_cast(flash_col->getElement(i))); } return; } @@ -196,7 +195,7 @@ void flashDateOrDateTimeColToArrowCol( { const IColumn * nested_col = getNestedCol(flash_col_untyped); using DateFieldType = DataTypeMyTimeBase::FieldType; - auto * flash_col = checkAndGetColumn>(nested_col); + const auto * flash_col = checkAndGetColumn>(nested_col); for (size_t i = start_index; i < end_index; i++) { if constexpr (is_nullable) @@ -217,7 +216,7 @@ void flashStringColToArrowCol(TiDBColumn & dag_column, const IColumn * flash_col { const IColumn * nested_col = getNestedCol(flash_col_untyped); // columnFixedString is not used so do not check it - auto * flash_col = checkAndGetColumn(nested_col); + const auto * flash_col = checkAndGetColumn(nested_col); for (size_t i = start_index; i < end_index; i++) { // todo check if we can convert flash_col to DAG col directly since the internal representation is almost the same @@ -242,7 +241,7 @@ void flashBitColToArrowCol( const tipb::FieldType & field_type) { const IColumn * nested_col = getNestedCol(flash_col_untyped); - auto * flash_col = checkAndGetColumn>(nested_col); + const auto * flash_col = checkAndGetColumn>(nested_col); for (size_t i = start_index; i < end_index; i++) { if constexpr (is_nullable) @@ -267,7 +266,7 @@ void flashEnumColToArrowCol( const IDataType * data_type) { const IColumn * nested_col = getNestedCol(flash_col_untyped); - auto * flash_col = checkAndGetColumn>(nested_col); + const auto * flash_col = checkAndGetColumn>(nested_col); const auto * enum_type = checkAndGetDataType(data_type); size_t enum_value_size = enum_type->getValues().size(); for (size_t i = start_index; i < end_index; i++) @@ -280,10 +279,10 @@ void flashEnumColToArrowCol( continue; } } - auto enum_value = (UInt64)flash_col->getElement(i); + auto enum_value = static_cast(flash_col->getElement(i)); if (enum_value == 0 || enum_value > enum_value_size) throw TiFlashException("number of enum overflow enum boundary", Errors::Coprocessor::Internal); - TiDBEnum ti_enum(enum_value, enum_type->getNameForValue((const DataTypeEnum16::FieldType)enum_value)); + TiDBEnum ti_enum(enum_value, enum_type->getNameForValue(static_cast(enum_value))); dag_column.append(ti_enum); } } @@ -300,7 +299,7 @@ void flashColToArrowCol(TiDBColumn & dag_column, const ColumnWithTypeAndName & f throw TiFlashException("Flash column and TiDB column has different not null flag", Errors::Coprocessor::Internal); } if (type->isNullable()) - type = dynamic_cast(type)->getNestedType().get(); + type = static_cast(type)->getNestedType().get(); switch (tidb_column_info.tp) { @@ -457,7 +456,7 @@ const char * arrowEnumColToFlashCol( { if (checkNull(i, null_count, null_bitmap, col)) continue; - const auto enum_value = (Int64)toLittleEndian(*(reinterpret_cast(pos + offsets[i]))); + const auto enum_value = static_cast(toLittleEndian(*(reinterpret_cast(pos + offsets[i])))); col.column->assumeMutable()->insert(Field(enum_value)); } return pos + offsets[length]; @@ -479,11 +478,11 @@ const char * arrowBitColToFlashCol( continue; const String value = String(pos + offsets[i], pos + offsets[i + 1]); if (value.length() == 0) - col.column->assumeMutable()->insert(Field(UInt64(0))); + col.column->assumeMutable()->insert(Field(static_cast(0))); UInt64 result = 0; - for (auto & c : value) + for (const auto & c : value) { - result = (result << 8u) | (UInt8)c; + result = (result << 8u) | static_cast(c); } col.column->assumeMutable()->insert(Field(result)); } @@ -500,7 +499,7 @@ T toCHDecimal(UInt8 digits_int, UInt8 digits_frac, bool negative, const Int32 * UInt8 tailing_digit = digits_frac % DIGITS_PER_WORD; typename T::NativeType value = 0; - const int word_max = int(1e9); + const int word_max = static_cast(1e9); for (int i = 0; i < word_int; i++) { value = value * word_max + word_buf[i]; @@ -552,28 +551,28 @@ const char * arrowDecimalColToFlashCol( pos += 1; Int32 word_buf[MAX_WORD_BUF_LEN]; const DataTypePtr decimal_type - = col.type->isNullable() ? dynamic_cast(col.type.get())->getNestedType() : col.type; - for (int j = 0; j < MAX_WORD_BUF_LEN; j++) + = col.type->isNullable() ? static_cast(col.type.get())->getNestedType() : col.type; + for (int & j : word_buf) { - word_buf[j] = toLittleEndian(*(reinterpret_cast(pos))); + j = toLittleEndian(*(reinterpret_cast(pos))); pos += 4; } - if (auto * type32 = checkDecimal(*decimal_type)) + if (const auto * type32 = checkDecimal(*decimal_type)) { auto res = toCHDecimal(digits_int, digits_frac, negative, word_buf); col.column->assumeMutable()->insert(DecimalField(res, type32->getScale())); } - else if (auto * type64 = checkDecimal(*decimal_type)) + else if (const auto * type64 = checkDecimal(*decimal_type)) { auto res = toCHDecimal(digits_int, digits_frac, negative, word_buf); col.column->assumeMutable()->insert(DecimalField(res, type64->getScale())); } - else if (auto * type128 = checkDecimal(*decimal_type)) + else if (const auto * type128 = checkDecimal(*decimal_type)) { auto res = toCHDecimal(digits_int, digits_frac, negative, word_buf); col.column->assumeMutable()->insert(DecimalField(res, type128->getScale())); } - else if (auto * type256 = checkDecimal(*decimal_type)) + else if (const auto * type256 = checkDecimal(*decimal_type)) { auto res = toCHDecimal(digits_int, digits_frac, negative, word_buf); col.column->assumeMutable()->insert(DecimalField(res, type256->getScale())); @@ -600,13 +599,13 @@ const char * arrowDateColToFlashCol( continue; } UInt64 chunk_time = toLittleEndian(*(reinterpret_cast(pos))); - UInt16 year = (UInt16)((chunk_time & MyTimeBase::YEAR_BIT_FIELD_MASK) >> MyTimeBase::YEAR_BIT_FIELD_OFFSET); - UInt8 month = (UInt8)((chunk_time & MyTimeBase::MONTH_BIT_FIELD_MASK) >> MyTimeBase::MONTH_BIT_FIELD_OFFSET); - UInt8 day = (UInt8)((chunk_time & MyTimeBase::DAY_BIT_FIELD_MASK) >> MyTimeBase::DAY_BIT_FIELD_OFFSET); - UInt16 hour = (UInt16)((chunk_time & MyTimeBase::HOUR_BIT_FIELD_MASK) >> MyTimeBase::HOUR_BIT_FIELD_OFFSET); - UInt8 minute = (UInt8)((chunk_time & MyTimeBase::MINUTE_BIT_FIELD_MASK) >> MyTimeBase::MINUTE_BIT_FIELD_OFFSET); - UInt8 second = (UInt8)((chunk_time & MyTimeBase::SECOND_BIT_FIELD_MASK) >> MyTimeBase::SECOND_BIT_FIELD_OFFSET); - UInt32 micro_second = (UInt32)((chunk_time & MyTimeBase::MICROSECOND_BIT_FIELD_MASK) >> MyTimeBase::MICROSECOND_BIT_FIELD_OFFSET); + auto year = static_cast((chunk_time & MyTimeBase::YEAR_BIT_FIELD_MASK) >> MyTimeBase::YEAR_BIT_FIELD_OFFSET); + auto month = static_cast((chunk_time & MyTimeBase::MONTH_BIT_FIELD_MASK) >> MyTimeBase::MONTH_BIT_FIELD_OFFSET); + auto day = static_cast((chunk_time & MyTimeBase::DAY_BIT_FIELD_MASK) >> MyTimeBase::DAY_BIT_FIELD_OFFSET); + auto hour = static_cast((chunk_time & MyTimeBase::HOUR_BIT_FIELD_MASK) >> MyTimeBase::HOUR_BIT_FIELD_OFFSET); + auto minute = static_cast((chunk_time & MyTimeBase::MINUTE_BIT_FIELD_MASK) >> MyTimeBase::MINUTE_BIT_FIELD_OFFSET); + auto second = static_cast((chunk_time & MyTimeBase::SECOND_BIT_FIELD_MASK) >> MyTimeBase::SECOND_BIT_FIELD_OFFSET); + auto micro_second = static_cast((chunk_time & MyTimeBase::MICROSECOND_BIT_FIELD_MASK) >> MyTimeBase::MICROSECOND_BIT_FIELD_OFFSET); MyDateTime mt(year, month, day, hour, minute, second, micro_second); pos += field_length; col.column->assumeMutable()->insert(Field(mt.toPackedUInt())); @@ -659,7 +658,7 @@ const char * arrowNumColToFlashCol( case TiDB::TypeFloat: u32 = toLittleEndian(*(reinterpret_cast(pos))); std::memcpy(&f32, &u32, sizeof(Float32)); - col.column->assumeMutable()->insert(Field((Float64)f32)); + col.column->assumeMutable()->insert(Field(static_cast(f32))); break; case TiDB::TypeDouble: u64 = toLittleEndian(*(reinterpret_cast(pos))); diff --git a/dbms/src/Flash/Coprocessor/CoprocessorReader.h b/dbms/src/Flash/Coprocessor/CoprocessorReader.h index 25c07cff49c..b48fdbcd6dc 100644 --- a/dbms/src/Flash/Coprocessor/CoprocessorReader.h +++ b/dbms/src/Flash/Coprocessor/CoprocessorReader.h @@ -139,7 +139,8 @@ class CoprocessorReader return detail; } - CoprocessorReaderResult nextResult(std::queue & block_queue, const Block & header) + // stream_id is only meaningful for ExchagneReceiver. + CoprocessorReaderResult nextResult(std::queue & block_queue, const Block & header, size_t /*stream_id*/) { auto && [result, has_next] = resp_iter.next(); if (!result.error.empty()) diff --git a/dbms/src/Flash/Coprocessor/DAGContext.cpp b/dbms/src/Flash/Coprocessor/DAGContext.cpp index 1ef7338a589..1cf7a0d6c87 100644 --- a/dbms/src/Flash/Coprocessor/DAGContext.cpp +++ b/dbms/src/Flash/Coprocessor/DAGContext.cpp @@ -30,6 +30,8 @@ extern const int DIVIDED_BY_ZERO; extern const int INVALID_TIME; } // namespace ErrorCodes +const String enableFineGrainedShuffleExtraInfo = "enable fine grained shuffle"; + bool strictSqlMode(UInt64 sql_mode) { return sql_mode & TiDBSQLMode::STRICT_ALL_TABLES || sql_mode & TiDBSQLMode::STRICT_TRANS_TABLES; @@ -75,6 +77,11 @@ std::unordered_map & DAGContext::getProfileStreamsMap return profile_streams_map; } +void DAGContext::updateFinalConcurrency(size_t cur_streams_size, size_t streams_upper_limit) +{ + final_concurrency = std::min(std::max(final_concurrency, cur_streams_size), streams_upper_limit); +} + void DAGContext::initExecutorIdToJoinIdMap() { // only mpp task has join executor @@ -206,12 +213,20 @@ void DAGContext::attachBlockIO(const BlockIO & io_) io = io_; } -const std::unordered_map> & DAGContext::getMPPExchangeReceiverMap() const +ExchangeReceiverPtr DAGContext::getMPPExchangeReceiver(const String & executor_id) const { if (!isMPPTask()) throw TiFlashException("mpp_exchange_receiver_map is used in mpp only", Errors::Coprocessor::Internal); - RUNTIME_ASSERT(mpp_exchange_receiver_map != nullptr, log, "MPPTask without exchange receiver map"); - return *mpp_exchange_receiver_map; + RUNTIME_ASSERT(mpp_receiver_set != nullptr, log, "MPPTask without receiver set"); + return mpp_receiver_set->getExchangeReceiver(executor_id); +} + +void DAGContext::addCoprocessorReader(const CoprocessorReaderPtr & coprocessor_reader) +{ + if (!isMPPTask()) + return; + RUNTIME_ASSERT(mpp_receiver_set != nullptr, log, "MPPTask without receiver set"); + return mpp_receiver_set->addCoprocessorReader(coprocessor_reader); } bool DAGContext::containsRegionsInfoForTable(Int64 table_id) const diff --git a/dbms/src/Flash/Coprocessor/DAGContext.h b/dbms/src/Flash/Coprocessor/DAGContext.h index 07b65b2d8fe..7bfc67afcad 100644 --- a/dbms/src/Flash/Coprocessor/DAGContext.h +++ b/dbms/src/Flash/Coprocessor/DAGContext.h @@ -37,8 +37,13 @@ namespace DB class Context; class MPPTunnelSet; class ExchangeReceiver; -using ExchangeReceiverMap = std::unordered_map>; -using ExchangeReceiverMapPtr = std::shared_ptr>>; +using ExchangeReceiverPtr = std::shared_ptr; +/// key: executor_id of ExchangeReceiver nodes in dag. +using ExchangeReceiverMap = std::unordered_map; +class MPPReceiverSet; +using MPPReceiverSetPtr = std::shared_ptr; +class CoprocessorReader; +using CoprocessorReaderPtr = std::shared_ptr; class Join; using JoinPtr = std::shared_ptr; @@ -111,6 +116,13 @@ constexpr UInt64 NO_ENGINE_SUBSTITUTION = 1ul << 30ul; constexpr UInt64 ALLOW_INVALID_DATES = 1ul << 32ul; } // namespace TiDBSQLMode +inline bool enableFineGrainedShuffle(uint64_t stream_count) +{ + return stream_count > 0; +} + +extern const String enableFineGrainedShuffleExtraInfo; + /// A context used to track the information that needs to be passed around during DAG planning. class DAGContext { @@ -298,17 +310,20 @@ class DAGContext return sql_mode & f; } + void updateFinalConcurrency(size_t cur_streams_size, size_t streams_upper_limit); + bool isTest() const { return is_test; } void setColumnsForTest(std::unordered_map & columns_for_test_map_) { columns_for_test_map = columns_for_test_map_; } ColumnsWithTypeAndName columnsForTest(String executor_id); bool columnsForTestEmpty() { return columns_for_test_map.empty(); } - const std::unordered_map> & getMPPExchangeReceiverMap() const; - void setMPPExchangeReceiverMap(ExchangeReceiverMapPtr & exchange_receiver_map) + ExchangeReceiverPtr getMPPExchangeReceiver(const String & executor_id) const; + void setMPPReceiverSet(const MPPReceiverSetPtr & receiver_set) { - mpp_exchange_receiver_map = exchange_receiver_map; + mpp_receiver_set = receiver_set; } + void addCoprocessorReader(const CoprocessorReaderPtr & coprocessor_reader); void addSubquery(const String & subquery_id, SubqueryForSet && subquery); bool hasSubquery() const { return !subqueries.empty(); } @@ -343,6 +358,10 @@ class DAGContext std::vector output_field_types; std::vector output_offsets; + /// Hold the order of list based executors. + /// It is used to ensure that the order of Execution summary of list based executors is the same as the order of list based executors. + std::vector list_based_executors_order; + private: void initExecutorIdToJoinIdMap(); void initOutputInfo(); @@ -350,7 +369,7 @@ class DAGContext private: /// Hold io for correcting the destruction order. BlockIO io; - /// profile_streams_map is a map that maps from executor_id to profile BlockInputStreams + /// profile_streams_map is a map that maps from executor_id to profile BlockInputStreams. std::unordered_map profile_streams_map; /// executor_id_to_join_id_map is a map that maps executor id to all the join executor id of itself and all its children. std::unordered_map> executor_id_to_join_id_map; @@ -369,8 +388,8 @@ class DAGContext ConcurrentBoundedQueue warnings; /// warning_count is the actual warning count during the entire execution std::atomic warning_count; - /// key: executor_id of ExchangeReceiver nodes in dag. - ExchangeReceiverMapPtr mpp_exchange_receiver_map; + + MPPReceiverSetPtr mpp_receiver_set; /// vector of SubqueriesForSets(such as join build subquery). /// The order of the vector is also the order of the subquery. std::vector subqueries; diff --git a/dbms/src/Flash/Coprocessor/DAGDriver.cpp b/dbms/src/Flash/Coprocessor/DAGDriver.cpp index 55a2024a8bc..9fe388f8fe4 100644 --- a/dbms/src/Flash/Coprocessor/DAGDriver.cpp +++ b/dbms/src/Flash/Coprocessor/DAGDriver.cpp @@ -72,6 +72,7 @@ DAGDriver::DAGDriver( ::grpc::ServerWriter<::coprocessor::BatchResponse> * writer_, bool internal_) : context(context_) + , dag_response(nullptr) , writer(writer_) , internal(internal_) , log(&Poco::Logger::get("DAGDriver")) @@ -129,7 +130,7 @@ try auto streaming_writer = std::make_shared(writer); TiDB::TiDBCollators collators; - std::unique_ptr response_writer = std::make_unique>( + std::unique_ptr response_writer = std::make_unique>( streaming_writer, std::vector(), collators, @@ -137,7 +138,9 @@ try context.getSettingsRef().dag_records_per_chunk, context.getSettingsRef().batch_send_min_limit, true, - dag_context); + dag_context, + /*fine_grained_shuffle_stream_count=*/0, + /*fine_grained_shuffle_batch_size=*/0); dag_output_stream = std::make_shared(streams.in->getHeader(), std::move(response_writer)); copyData(*streams.in, *dag_output_stream); } diff --git a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp index aa269469cdb..5fbd86e9762 100644 --- a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp +++ b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp @@ -1130,30 +1130,40 @@ NamesWithAliases DAGExpressionAnalyzer::appendFinalProjectForRootQueryBlock( const std::vector & output_offsets, const String & column_prefix, bool keep_session_timezone_info) +{ + auto & step = initAndGetLastStep(chain); + + NamesWithAliases final_project = buildFinalProjection(step.actions, schema, output_offsets, column_prefix, keep_session_timezone_info); + + for (const auto & name : final_project) + { + step.required_output.push_back(name.first); + } + return final_project; +} + +NamesWithAliases DAGExpressionAnalyzer::buildFinalProjection( + const ExpressionActionsPtr & actions, + const std::vector & schema, + const std::vector & output_offsets, + const String & column_prefix, + bool keep_session_timezone_info) { if (unlikely(output_offsets.empty())) - throw Exception("Root Query block without output_offsets", ErrorCodes::LOGICAL_ERROR); + throw Exception("DAGRequest without output_offsets", ErrorCodes::LOGICAL_ERROR); bool need_append_timezone_cast = !keep_session_timezone_info && !context.getTimezoneInfo().is_utc_timezone; auto [need_append_type_cast, need_append_type_cast_vec] = isCastRequiredForRootFinalProjection(schema, output_offsets); assert(need_append_type_cast_vec.size() == output_offsets.size()); - auto & step = initAndGetLastStep(chain); - if (need_append_timezone_cast || need_append_type_cast) { // after appendCastForRootFinalProjection, source_columns has been modified. - appendCastForRootFinalProjection(step.actions, schema, output_offsets, need_append_timezone_cast, need_append_type_cast_vec); + appendCastForRootFinalProjection(actions, schema, output_offsets, need_append_timezone_cast, need_append_type_cast_vec); } // generate project aliases from source_columns. - NamesWithAliases final_project = genRootFinalProjectAliases(column_prefix, output_offsets); - - for (const auto & name : final_project) - { - step.required_output.push_back(name.first); - } - return final_project; + return genRootFinalProjectAliases(column_prefix, output_offsets); } String DAGExpressionAnalyzer::alignReturnType( diff --git a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.h b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.h index 046088ab2b2..63d35abe26d 100644 --- a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.h +++ b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.h @@ -102,6 +102,8 @@ class DAGExpressionAnalyzer : private boost::noncopyable ExpressionActionsChain & chain, const String & column_prefix) const; + NamesWithAliases genNonRootFinalProjectAliases(const String & column_prefix) const; + // Generate a project action for root DAGQueryBlock, // to keep the schema of Block and tidb-schema the same. NamesWithAliases appendFinalProjectForRootQueryBlock( @@ -111,6 +113,13 @@ class DAGExpressionAnalyzer : private boost::noncopyable const String & column_prefix, bool keep_session_timezone_info); + NamesWithAliases buildFinalProjection( + const ExpressionActionsPtr & actions, + const std::vector & schema, + const std::vector & output_offsets, + const String & column_prefix, + bool keep_session_timezone_info); + String getActions( const tipb::Expr & expr, const ExpressionActionsPtr & actions, @@ -153,17 +162,38 @@ class DAGExpressionAnalyzer : private boost::noncopyable const tipb::Window & window, size_t window_columns_start_index); -#ifndef DBMS_PUBLIC_GTEST -private: -#endif NamesAndTypes buildOrderColumns( const ExpressionActionsPtr & actions, const ::google::protobuf::RepeatedPtrField & order_by); + String buildFilterColumn( + const ExpressionActionsPtr & actions, + const std::vector & conditions); + + void buildAggFuncs( + const tipb::Aggregation & aggregation, + const ExpressionActionsPtr & actions, + AggregateDescriptions & aggregate_descriptions, + NamesAndTypes & aggregated_columns); + + void buildAggGroupBy( + const google::protobuf::RepeatedPtrField & group_by, + const ExpressionActionsPtr & actions, + AggregateDescriptions & aggregate_descriptions, + NamesAndTypes & aggregated_columns, + Names & aggregation_keys, + std::unordered_set & agg_key_set, + bool group_by_collation_sensitive, + TiDB::TiDBCollators & collators); + void appendCastAfterAgg( const ExpressionActionsPtr & actions, const tipb::Aggregation & agg); +#ifndef DBMS_PUBLIC_GTEST +private: +#endif + String buildTupleFunctionForGroupConcat( const tipb::Expr & expr, SortDescription & sort_desc, @@ -187,22 +217,6 @@ class DAGExpressionAnalyzer : private boost::noncopyable NamesAndTypes & aggregated_columns, bool empty_input_as_null); - void buildAggFuncs( - const tipb::Aggregation & aggregation, - const ExpressionActionsPtr & actions, - AggregateDescriptions & aggregate_descriptions, - NamesAndTypes & aggregated_columns); - - void buildAggGroupBy( - const google::protobuf::RepeatedPtrField & group_by, - const ExpressionActionsPtr & actions, - AggregateDescriptions & aggregate_descriptions, - NamesAndTypes & aggregated_columns, - Names & aggregation_keys, - std::unordered_set & agg_key_set, - bool group_by_collation_sensitive, - TiDB::TiDBCollators & collators); - void fillArgumentDetail( const ExpressionActionsPtr & actions, const tipb::Expr & arg, @@ -275,12 +289,6 @@ class DAGExpressionAnalyzer : private boost::noncopyable const ExpressionActionsPtr & actions, const String & column_name); - String buildFilterColumn( - const ExpressionActionsPtr & actions, - const std::vector & conditions); - - NamesWithAliases genNonRootFinalProjectAliases(const String & column_prefix) const; - NamesWithAliases genRootFinalProjectAliases( const String & column_prefix, const std::vector & output_offsets) const; diff --git a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzerHelper.cpp b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzerHelper.cpp index ee529680d28..23bbb4586b3 100644 --- a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzerHelper.cpp +++ b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzerHelper.cpp @@ -450,6 +450,7 @@ DAGExpressionAnalyzerHelper::FunctionBuilderMap DAGExpressionAnalyzerHelper::fun {"bitOr", DAGExpressionAnalyzerHelper::buildBitwiseFunction}, {"bitXor", DAGExpressionAnalyzerHelper::buildBitwiseFunction}, {"bitNot", DAGExpressionAnalyzerHelper::buildBitwiseFunction}, + {"bitShiftRight", DAGExpressionAnalyzerHelper::buildBitwiseFunction}, {"leftUTF8", DAGExpressionAnalyzerHelper::buildLeftUTF8Function}, {"date_add", DAGExpressionAnalyzerHelper::buildDateAddOrSubFunction}, {"date_sub", DAGExpressionAnalyzerHelper::buildDateAddOrSubFunction}, diff --git a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp index 86d6428c92a..764bf07f533 100644 --- a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp +++ b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp @@ -18,7 +18,6 @@ #include #include #include -#include #include #include #include @@ -268,7 +267,7 @@ void DAGQueryBlockInterpreter::handleJoin(const tipb::Join & join, DAGPipeline & size_t join_build_concurrency = settings.join_concurrent_build ? std::min(max_streams, build_pipeline.streams.size()) : 1; /// build side streams - executeExpression(build_pipeline, build_side_prepare_actions, "append join key and join filters for build side"); + executeExpression(build_pipeline, build_side_prepare_actions, log, "append join key and join filters for build side"); // add a HashJoinBuildBlockInputStream to build a shared hash table auto get_concurrency_build_index = JoinInterpreterHelper::concurrencyBuildIndexGenerator(join_build_concurrency); build_pipeline.transform([&](auto & stream) { @@ -284,7 +283,7 @@ void DAGQueryBlockInterpreter::handleJoin(const tipb::Join & join, DAGPipeline & join_ptr->init(right_query.source->getHeader(), join_build_concurrency); /// probe side streams - executeExpression(probe_pipeline, probe_side_prepare_actions, "append join key and join filters for probe side"); + executeExpression(probe_pipeline, probe_side_prepare_actions, log, "append join key and join filters for probe side"); NamesAndTypes source_columns; for (const auto & p : probe_pipeline.firstStream()->getHeader()) source_columns.emplace_back(p.name, p.type); @@ -347,14 +346,26 @@ void DAGQueryBlockInterpreter::executeWhere(DAGPipeline & pipeline, const Expres void DAGQueryBlockInterpreter::executeWindow( DAGPipeline & pipeline, - WindowDescription & window_description) + WindowDescription & window_description, + bool enable_fine_grained_shuffle) { - executeExpression(pipeline, window_description.before_window, "before window"); + executeExpression(pipeline, window_description.before_window, log, "before window"); - /// If there are several streams, we merge them into one - executeUnion(pipeline, max_streams, log, false, "merge into one for window input"); - assert(pipeline.streams.size() == 1); - pipeline.firstStream() = std::make_shared(pipeline.firstStream(), window_description, log->identifier()); + if (enable_fine_grained_shuffle) + { + /// Window function can be multiple threaded when fine grained shuffle is enabled. + pipeline.transform([&](auto & stream) { + stream = std::make_shared(stream, window_description, log->identifier()); + stream->setExtraInfo(enableFineGrainedShuffleExtraInfo); + }); + } + else + { + /// If there are several streams, we merge them into one. + executeUnion(pipeline, max_streams, log, false, "merge into one for window input"); + assert(pipeline.streams.size() == 1); + pipeline.firstStream() = std::make_shared(pipeline.firstStream(), window_description, log->identifier()); + } } void DAGQueryBlockInterpreter::executeAggregation( @@ -365,10 +376,7 @@ void DAGQueryBlockInterpreter::executeAggregation( AggregateDescriptions & aggregate_descriptions, bool is_final_agg) { - pipeline.transform([&](auto & stream) { - stream = std::make_shared(stream, expression_actions_ptr, log->identifier()); - stream->setExtraInfo("before aggregation"); - }); + executeExpression(pipeline, expression_actions_ptr, log, "before aggregation"); Block before_agg_header = pipeline.firstStream()->getHeader(); @@ -383,34 +391,39 @@ void DAGQueryBlockInterpreter::executeAggregation( is_final_agg); /// If there are several sources, then we perform parallel aggregation - if (pipeline.streams.size() > 1) + if (pipeline.streams.size() > 1 || pipeline.streams_with_non_joined_data.size() > 1) { const Settings & settings = context.getSettingsRef(); - BlockInputStreamPtr stream_with_non_joined_data = combinedNonJoinedDataStream(pipeline, max_streams, log); - pipeline.firstStream() = std::make_shared( + BlockInputStreamPtr stream = std::make_shared( pipeline.streams, - stream_with_non_joined_data, + pipeline.streams_with_non_joined_data, params, context.getFileProvider(), true, max_streams, settings.aggregation_memory_efficient_merge_threads ? static_cast(settings.aggregation_memory_efficient_merge_threads) : static_cast(settings.max_threads), log->identifier()); + pipeline.streams.resize(1); + pipeline.streams_with_non_joined_data.clear(); + pipeline.firstStream() = std::move(stream); + // should record for agg before restore concurrency. See #3804. recordProfileStreams(pipeline, query_block.aggregation_name); restorePipelineConcurrency(pipeline); } else { - BlockInputStreamPtr stream_with_non_joined_data = combinedNonJoinedDataStream(pipeline, max_streams, log); BlockInputStreams inputs; if (!pipeline.streams.empty()) inputs.push_back(pipeline.firstStream()); - else - pipeline.streams.resize(1); - if (stream_with_non_joined_data) - inputs.push_back(stream_with_non_joined_data); + + if (!pipeline.streams_with_non_joined_data.empty()) + inputs.push_back(pipeline.streams_with_non_joined_data.at(0)); + + pipeline.streams.resize(1); + pipeline.streams_with_non_joined_data.clear(); + pipeline.firstStream() = std::make_shared( std::make_shared(inputs, log->identifier()), params, @@ -421,56 +434,15 @@ void DAGQueryBlockInterpreter::executeAggregation( } } -void DAGQueryBlockInterpreter::executeExpression(DAGPipeline & pipeline, const ExpressionActionsPtr & expressionActionsPtr, const String & extra_info) -{ - if (!expressionActionsPtr->getActions().empty()) - { - pipeline.transform([&](auto & stream) { - stream = std::make_shared(stream, expressionActionsPtr, log->identifier()); - stream->setExtraInfo(extra_info); - }); - } -} - -void DAGQueryBlockInterpreter::executeWindowOrder(DAGPipeline & pipeline, SortDescription sort_desc) +void DAGQueryBlockInterpreter::executeWindowOrder(DAGPipeline & pipeline, SortDescription sort_desc, bool enable_fine_grained_shuffle) { - orderStreams(pipeline, sort_desc, 0); + orderStreams(pipeline, max_streams, sort_desc, 0, enable_fine_grained_shuffle, context, log); } void DAGQueryBlockInterpreter::executeOrder(DAGPipeline & pipeline, const NamesAndTypes & order_columns) { Int64 limit = query_block.limit_or_topn->topn().limit(); - orderStreams(pipeline, getSortDescription(order_columns, query_block.limit_or_topn->topn().order_by()), limit); -} - -void DAGQueryBlockInterpreter::orderStreams(DAGPipeline & pipeline, SortDescription order_descr, Int64 limit) -{ - const Settings & settings = context.getSettingsRef(); - - pipeline.transform([&](auto & stream) { - auto sorting_stream = std::make_shared(stream, order_descr, log->identifier(), limit); - - /// Limits on sorting - IProfilingBlockInputStream::LocalLimits limits; - limits.mode = IProfilingBlockInputStream::LIMITS_TOTAL; - limits.size_limits = SizeLimits(settings.max_rows_to_sort, settings.max_bytes_to_sort, settings.sort_overflow_mode); - sorting_stream->setLimits(limits); - - stream = sorting_stream; - }); - - /// If there are several streams, we merge them into one - executeUnion(pipeline, max_streams, log, false, "for partial order"); - - /// Merge the sorted blocks. - pipeline.firstStream() = std::make_shared( - pipeline.firstStream(), - order_descr, - settings.max_block_size, - limit, - settings.max_bytes_before_external_sort, - context.getTemporaryPath(), - log->identifier()); + orderStreams(pipeline, max_streams, getSortDescription(order_columns, query_block.limit_or_topn->topn().order_by()), limit, false, context, log); } void DAGQueryBlockInterpreter::recordProfileStreams(DAGPipeline & pipeline, const String & key) @@ -481,17 +453,30 @@ void DAGQueryBlockInterpreter::recordProfileStreams(DAGPipeline & pipeline, cons void DAGQueryBlockInterpreter::handleExchangeReceiver(DAGPipeline & pipeline) { - auto it = dagContext().getMPPExchangeReceiverMap().find(query_block.source_name); - if (unlikely(it == dagContext().getMPPExchangeReceiverMap().end())) + auto exchange_receiver = dagContext().getMPPExchangeReceiver(query_block.source_name); + if (unlikely(exchange_receiver == nullptr)) throw Exception("Can not find exchange receiver for " + query_block.source_name, ErrorCodes::LOGICAL_ERROR); // todo choose a more reasonable stream number auto & exchange_receiver_io_input_streams = dagContext().getInBoundIOInputStreamsMap()[query_block.source_name]; - for (size_t i = 0; i < max_streams; ++i) + + const bool enable_fine_grained_shuffle = enableFineGrainedShuffle(exchange_receiver->getFineGrainedShuffleStreamCount()); + String extra_info = "squashing after exchange receiver"; + size_t stream_count = max_streams; + if (enable_fine_grained_shuffle) + { + extra_info += ", " + enableFineGrainedShuffleExtraInfo; + stream_count = std::min(max_streams, exchange_receiver->getFineGrainedShuffleStreamCount()); + } + + for (size_t i = 0; i < stream_count; ++i) { - BlockInputStreamPtr stream = std::make_shared(it->second, log->identifier(), query_block.source_name); + BlockInputStreamPtr stream = std::make_shared(exchange_receiver, + log->identifier(), + query_block.source_name, + /*stream_id=*/enable_fine_grained_shuffle ? i : 0); exchange_receiver_io_input_streams.push_back(stream); stream = std::make_shared(stream, 8192, 0, log->identifier()); - stream->setExtraInfo("squashing after exchange receiver"); + stream->setExtraInfo(extra_info); pipeline.streams.push_back(stream); } NamesAndTypes source_columns; @@ -548,15 +533,12 @@ void DAGQueryBlockInterpreter::handleProjection(DAGPipeline & pipeline, const ti output_columns.emplace_back(alias, col.type); project_cols.emplace_back(col.name, alias); } - pipeline.transform([&](auto & stream) { - stream = std::make_shared(stream, chain.getLastActions(), log->identifier()); - stream->setExtraInfo("before projection"); - }); + executeExpression(pipeline, chain.getLastActions(), log, "before projection"); executeProject(pipeline, project_cols, "projection"); analyzer = std::make_unique(std::move(output_columns), context); } -void DAGQueryBlockInterpreter::handleWindow(DAGPipeline & pipeline, const tipb::Window & window) +void DAGQueryBlockInterpreter::handleWindow(DAGPipeline & pipeline, const tipb::Window & window, bool enable_fine_grained_shuffle) { NamesAndTypes input_columns; assert(input_streams_vec.size() == 1); @@ -565,13 +547,13 @@ void DAGQueryBlockInterpreter::handleWindow(DAGPipeline & pipeline, const tipb:: input_columns.emplace_back(p.name, p.type); DAGExpressionAnalyzer dag_analyzer(input_columns, context); WindowDescription window_description = dag_analyzer.buildWindowDescription(window); - executeWindow(pipeline, window_description); - executeExpression(pipeline, window_description.after_window, "cast after window"); + executeWindow(pipeline, window_description, enable_fine_grained_shuffle); + executeExpression(pipeline, window_description.after_window, log, "cast after window"); analyzer = std::make_unique(window_description.after_window_columns, context); } -void DAGQueryBlockInterpreter::handleWindowOrder(DAGPipeline & pipeline, const tipb::Sort & window_sort) +void DAGQueryBlockInterpreter::handleWindowOrder(DAGPipeline & pipeline, const tipb::Sort & window_sort, bool enable_fine_grained_shuffle) { NamesAndTypes input_columns; assert(input_streams_vec.size() == 1); @@ -580,7 +562,7 @@ void DAGQueryBlockInterpreter::handleWindowOrder(DAGPipeline & pipeline, const t input_columns.emplace_back(p.name, p.type); DAGExpressionAnalyzer dag_analyzer(input_columns, context); auto order_columns = dag_analyzer.buildWindowOrderColumns(window_sort); - executeWindowOrder(pipeline, getSortDescription(order_columns, window_sort.byitems())); + executeWindowOrder(pipeline, getSortDescription(order_columns, window_sort.byitems()), enable_fine_grained_shuffle); analyzer = std::make_unique(std::move(input_columns), context); } @@ -628,13 +610,13 @@ void DAGQueryBlockInterpreter::executeImpl(DAGPipeline & pipeline) } else if (query_block.source->tp() == tipb::ExecType::TypeWindow) { - handleWindow(pipeline, query_block.source->window()); + handleWindow(pipeline, query_block.source->window(), enableFineGrainedShuffle(query_block.source->fine_grained_shuffle_stream_count())); recordProfileStreams(pipeline, query_block.source_name); restorePipelineConcurrency(pipeline); } else if (query_block.source->tp() == tipb::ExecType::TypeSort) { - handleWindowOrder(pipeline, query_block.source->sort()); + handleWindowOrder(pipeline, query_block.source->sort(), enableFineGrainedShuffle(query_block.source->fine_grained_shuffle_stream_count())); recordProfileStreams(pipeline, query_block.source_name); } else @@ -678,7 +660,7 @@ void DAGQueryBlockInterpreter::executeImpl(DAGPipeline & pipeline) } if (res.before_order_and_select) { - executeExpression(pipeline, res.before_order_and_select, "before order and select"); + executeExpression(pipeline, res.before_order_and_select, log, "before order and select"); } if (!res.order_columns.empty()) @@ -714,10 +696,7 @@ void DAGQueryBlockInterpreter::executeProject(DAGPipeline & pipeline, NamesWithA if (project_cols.empty()) return; ExpressionActionsPtr project = generateProjectExpressionActions(pipeline.firstStream(), context, project_cols); - pipeline.transform([&](auto & stream) { - stream = std::make_shared(stream, project, log->identifier()); - stream->setExtraInfo(extra_info); - }); + executeExpression(pipeline, project, log, extra_info); } void DAGQueryBlockInterpreter::executeLimit(DAGPipeline & pipeline) @@ -743,19 +722,47 @@ void DAGQueryBlockInterpreter::handleExchangeSender(DAGPipeline & pipeline) std::vector partition_col_ids = ExchangeSenderInterpreterHelper::genPartitionColIds(exchange_sender); TiDB::TiDBCollators partition_col_collators = ExchangeSenderInterpreterHelper::genPartitionColCollators(exchange_sender); int stream_id = 0; - pipeline.transform([&](auto & stream) { - // construct writer - std::unique_ptr response_writer = std::make_unique>( - context.getDAGContext()->tunnel_set, - partition_col_ids, - partition_col_collators, - exchange_sender.tp(), - context.getSettingsRef().dag_records_per_chunk, - context.getSettingsRef().batch_send_min_limit, - stream_id++ == 0, /// only one stream needs to sending execution summaries for the last response - dagContext()); - stream = std::make_shared(stream, std::move(response_writer), log->identifier()); - }); + const uint64_t stream_count = query_block.exchange_sender->fine_grained_shuffle_stream_count(); + const uint64_t batch_size = query_block.exchange_sender->fine_grained_shuffle_batch_size(); + + if (enableFineGrainedShuffle(stream_count)) + { + pipeline.transform([&](auto & stream) { + // construct writer + std::unique_ptr response_writer = std::make_unique>( + context.getDAGContext()->tunnel_set, + partition_col_ids, + partition_col_collators, + exchange_sender.tp(), + context.getSettingsRef().dag_records_per_chunk, + context.getSettingsRef().batch_send_min_limit, + stream_id++ == 0, /// only one stream needs to sending execution summaries for the last response + dagContext(), + stream_count, + batch_size); + stream = std::make_shared(stream, std::move(response_writer), log->identifier()); + stream->setExtraInfo(enableFineGrainedShuffleExtraInfo); + }); + RUNTIME_CHECK(exchange_sender.tp() == tipb::ExchangeType::Hash, Exception, "exchange_sender has to be hash partition when fine grained shuffle is enabled"); + RUNTIME_CHECK(stream_count <= 1024, Exception, "fine_grained_shuffle_stream_count should not be greater than 1024"); + } + else + { + pipeline.transform([&](auto & stream) { + std::unique_ptr response_writer = std::make_unique>( + context.getDAGContext()->tunnel_set, + partition_col_ids, + partition_col_collators, + exchange_sender.tp(), + context.getSettingsRef().dag_records_per_chunk, + context.getSettingsRef().batch_send_min_limit, + stream_id++ == 0, /// only one stream needs to sending execution summaries for the last response + dagContext(), + stream_count, + batch_size); + stream = std::make_shared(stream, std::move(response_writer), log->identifier()); + }); + } } void DAGQueryBlockInterpreter::handleMockExchangeSender(DAGPipeline & pipeline) @@ -783,4 +790,4 @@ BlockInputStreams DAGQueryBlockInterpreter::execute() return pipeline.streams; } -} // namespace DB \ No newline at end of file +} // namespace DB diff --git a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.h b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.h index e68c4f91cee..c449b37e360 100644 --- a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.h +++ b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.h @@ -64,17 +64,16 @@ class DAGQueryBlockInterpreter void handleExchangeReceiver(DAGPipeline & pipeline); void handleMockExchangeReceiver(DAGPipeline & pipeline); void handleProjection(DAGPipeline & pipeline, const tipb::Projection & projection); - void handleWindow(DAGPipeline & pipeline, const tipb::Window & window); - void handleWindowOrder(DAGPipeline & pipeline, const tipb::Sort & window_sort); + void handleWindow(DAGPipeline & pipeline, const tipb::Window & window, bool enable_fine_grained_shuffle); + void handleWindowOrder(DAGPipeline & pipeline, const tipb::Sort & window_sort, bool enable_fine_grained_shuffle); void executeWhere(DAGPipeline & pipeline, const ExpressionActionsPtr & expressionActionsPtr, String & filter_column, const String & extra_info = ""); - void executeExpression(DAGPipeline & pipeline, const ExpressionActionsPtr & expressionActionsPtr, const String & extra_info = ""); - void executeWindowOrder(DAGPipeline & pipeline, SortDescription sort_desc); - void orderStreams(DAGPipeline & pipeline, SortDescription order_descr, Int64 limit); + void executeWindowOrder(DAGPipeline & pipeline, SortDescription sort_desc, bool enable_fine_grained_shuffle); void executeOrder(DAGPipeline & pipeline, const NamesAndTypes & order_columns); void executeLimit(DAGPipeline & pipeline); void executeWindow( DAGPipeline & pipeline, - WindowDescription & window_description); + WindowDescription & window_description, + bool enable_fine_grained_shuffle); void executeAggregation( DAGPipeline & pipeline, const ExpressionActionsPtr & expression_actions_ptr, diff --git a/dbms/src/Flash/Coprocessor/DAGQuerySource.cpp b/dbms/src/Flash/Coprocessor/DAGQuerySource.cpp index 882699e1599..d68a7b17aaa 100644 --- a/dbms/src/Flash/Coprocessor/DAGQuerySource.cpp +++ b/dbms/src/Flash/Coprocessor/DAGQuerySource.cpp @@ -20,6 +20,26 @@ namespace DB { +namespace +{ +void fillOrderForListBasedExecutors(DAGContext & dag_context, const DAGQueryBlock & query_block) +{ + assert(query_block.source); + auto & list_based_executors_order = dag_context.list_based_executors_order; + list_based_executors_order.push_back(query_block.source_name); + if (query_block.selection) + list_based_executors_order.push_back(query_block.selection_name); + if (query_block.aggregation) + list_based_executors_order.push_back(query_block.aggregation_name); + if (query_block.having) + list_based_executors_order.push_back(query_block.having_name); + if (query_block.limit_or_topn) + list_based_executors_order.push_back(query_block.limit_or_topn_name); + if (query_block.exchange_sender) + dag_context.list_based_executors_order.push_back(query_block.exchange_sender_name); +} +} // namespace + DAGQuerySource::DAGQuerySource(Context & context_) : context(context_) { @@ -32,6 +52,9 @@ DAGQuerySource::DAGQuerySource(Context & context_) else { root_query_block = std::make_shared(1, dag_request.executors()); + auto & dag_context = getDAGContext(); + if (!dag_context.return_executor_id) + fillOrderForListBasedExecutors(dag_context, *root_query_block); } } diff --git a/dbms/src/Flash/Coprocessor/DAGResponseWriter.cpp b/dbms/src/Flash/Coprocessor/DAGResponseWriter.cpp index 53bebc91da8..33f6d99f9d8 100644 --- a/dbms/src/Flash/Coprocessor/DAGResponseWriter.cpp +++ b/dbms/src/Flash/Coprocessor/DAGResponseWriter.cpp @@ -89,12 +89,10 @@ void DAGResponseWriter::addExecuteSummaries(tipb::SelectResponse & response, boo } } - /// add execution_summary for local executor - for (auto & p : dag_context.getProfileStreamsMap()) - { + auto fill_execution_summary = [&](const String & executor_id, const BlockInputStreams & streams) { ExecutionSummary current; /// part 1: local execution info - for (auto & stream_ptr : p.second) + for (const auto & stream_ptr : streams) { if (auto * p_stream = dynamic_cast(stream_ptr.get())) { @@ -105,16 +103,16 @@ void DAGResponseWriter::addExecuteSummaries(tipb::SelectResponse & response, boo current.concurrency++; } /// part 2: remote execution info - if (merged_remote_execution_summaries.find(p.first) != merged_remote_execution_summaries.end()) + if (merged_remote_execution_summaries.find(executor_id) != merged_remote_execution_summaries.end()) { - for (auto & remote : merged_remote_execution_summaries[p.first]) + for (auto & remote : merged_remote_execution_summaries[executor_id]) current.merge(remote, false); } /// part 3: for join need to add the build time /// In TiFlash, a hash join's build side is finished before probe side starts, /// so the join probe side's running time does not include hash table's build time, /// when construct ExecSummaries, we need add the build cost to probe executor - auto all_join_id_it = dag_context.getExecutorIdToJoinIdMap().find(p.first); + auto all_join_id_it = dag_context.getExecutorIdToJoinIdMap().find(executor_id); if (all_join_id_it != dag_context.getExecutorIdToJoinIdMap().end()) { for (const auto & join_executor_id : all_join_id_it->second) @@ -138,8 +136,27 @@ void DAGResponseWriter::addExecuteSummaries(tipb::SelectResponse & response, boo } current.time_processed_ns += dag_context.compile_time_ns; - fillTiExecutionSummary(response.add_execution_summaries(), current, p.first, delta_mode); + fillTiExecutionSummary(response.add_execution_summaries(), current, executor_id, delta_mode); + }; + + /// add execution_summary for local executor + if (dag_context.return_executor_id) + { + for (auto & p : dag_context.getProfileStreamsMap()) + fill_execution_summary(p.first, p.second); + } + else + { + const auto & profile_streams_map = dag_context.getProfileStreamsMap(); + assert(profile_streams_map.size() == dag_context.list_based_executors_order.size()); + for (const auto & executor_id : dag_context.list_based_executors_order) + { + auto it = profile_streams_map.find(executor_id); + assert(it != profile_streams_map.end()); + fill_execution_summary(executor_id, it->second); + } } + for (auto & p : merged_remote_execution_summaries) { if (local_executors.find(p.first) == local_executors.end()) diff --git a/dbms/src/Flash/Coprocessor/DAGStorageInterpreter.cpp b/dbms/src/Flash/Coprocessor/DAGStorageInterpreter.cpp index 14cddd94730..390ce7b9948 100644 --- a/dbms/src/Flash/Coprocessor/DAGStorageInterpreter.cpp +++ b/dbms/src/Flash/Coprocessor/DAGStorageInterpreter.cpp @@ -486,7 +486,8 @@ void DAGStorageInterpreter::buildRemoteStreams(std::vector && rem std::vector tasks(all_tasks.begin() + task_start, all_tasks.begin() + task_end); auto coprocessor_reader = std::make_shared(schema, cluster, tasks, has_enforce_encode_type, 1); - BlockInputStreamPtr input = std::make_shared(coprocessor_reader, log->identifier(), table_scan.getTableScanExecutorID()); + context.getDAGContext()->addCoprocessorReader(coprocessor_reader); + BlockInputStreamPtr input = std::make_shared(coprocessor_reader, log->identifier(), table_scan.getTableScanExecutorID(), /*stream_id=*/0); pipeline.streams.push_back(input); task_start = task_end; } diff --git a/dbms/src/Flash/Coprocessor/DAGUtils.cpp b/dbms/src/Flash/Coprocessor/DAGUtils.cpp index 9ffa29cd14d..2003103a20a 100644 --- a/dbms/src/Flash/Coprocessor/DAGUtils.cpp +++ b/dbms/src/Flash/Coprocessor/DAGUtils.cpp @@ -332,7 +332,7 @@ const std::unordered_map scalar_func_map({ {tipb::ScalarFuncSig::DecimalIsFalseWithNull, "isFalseWithNull"}, //{tipb::ScalarFuncSig::LeftShift, "cast"}, - //{tipb::ScalarFuncSig::RightShift, "cast"}, + {tipb::ScalarFuncSig::RightShift, "bitShiftRight"}, //{tipb::ScalarFuncSig::BitCount, "cast"}, //{tipb::ScalarFuncSig::GetParamString, "cast"}, @@ -513,7 +513,7 @@ const std::unordered_map scalar_func_map({ //{tipb::ScalarFuncSig::YearWeekWithMode, "cast"}, //{tipb::ScalarFuncSig::YearWeekWithoutMode, "cast"}, - //{tipb::ScalarFuncSig::GetFormat, "cast"}, + {tipb::ScalarFuncSig::GetFormat, "getFormat"}, {tipb::ScalarFuncSig::SysDateWithFsp, "sysDateWithFsp"}, {tipb::ScalarFuncSig::SysDateWithoutFsp, "sysDateWithoutFsp"}, //{tipb::ScalarFuncSig::CurrentDate, "cast"}, @@ -561,7 +561,7 @@ const std::unordered_map scalar_func_map({ {tipb::ScalarFuncSig::Quarter, "toQuarter"}, //{tipb::ScalarFuncSig::SecToTime, "cast"}, - //{tipb::ScalarFuncSig::TimeToSec, "cast"}, + {tipb::ScalarFuncSig::TimeToSec, "tidbTimeToSec"}, //{tipb::ScalarFuncSig::TimestampAdd, "cast"}, {tipb::ScalarFuncSig::ToDays, "tidbToDays"}, {tipb::ScalarFuncSig::ToSeconds, "tidbToSeconds"}, @@ -648,8 +648,8 @@ const std::unordered_map scalar_func_map({ //{tipb::ScalarFuncSig::Quote, "cast"}, //{tipb::ScalarFuncSig::Repeat, "cast"}, {tipb::ScalarFuncSig::Replace, "replaceAll"}, - //{tipb::ScalarFuncSig::ReverseUTF8, "cast"}, - //{tipb::ScalarFuncSig::Reverse, "cast"}, + {tipb::ScalarFuncSig::ReverseUTF8, "reverseUTF8"}, + {tipb::ScalarFuncSig::Reverse, "reverse"}, {tipb::ScalarFuncSig::RightUTF8, "rightUTF8"}, //{tipb::ScalarFuncSig::Right, "cast"}, {tipb::ScalarFuncSig::RpadUTF8, "rpadUTF8"}, diff --git a/dbms/src/Flash/Coprocessor/DecodeDetail.h b/dbms/src/Flash/Coprocessor/DecodeDetail.h index 9bad0ca2b72..91851650d9e 100644 --- a/dbms/src/Flash/Coprocessor/DecodeDetail.h +++ b/dbms/src/Flash/Coprocessor/DecodeDetail.h @@ -21,8 +21,12 @@ namespace DB /// Detail of the packet that decoding in TiRemoteInputStream.RemoteReader.decodeChunks() struct DecodeDetail { + // For fine grained shuffle, each ExchangeReceiver/thread will decode its own blocks. + // So this is the row number of partial blocks of the original packet. + // This will be the row number of all blocks of the original packet if it's not fine grained shuffle. Int64 rows = 0; - // byte size of origin packet. + + // Total byte size of the origin packet, even for fine grained shuffle. Int64 packet_bytes = 0; }; -} // namespace DB \ No newline at end of file +} // namespace DB diff --git a/dbms/src/Flash/Coprocessor/GenSchemaAndColumn.cpp b/dbms/src/Flash/Coprocessor/GenSchemaAndColumn.cpp index be3475f714f..efb8a08f1d8 100644 --- a/dbms/src/Flash/Coprocessor/GenSchemaAndColumn.cpp +++ b/dbms/src/Flash/Coprocessor/GenSchemaAndColumn.cpp @@ -54,4 +54,15 @@ ColumnsWithTypeAndName getColumnWithTypeAndName(const NamesAndTypes & names_and_ } return column_with_type_and_names; } -} // namespace DB \ No newline at end of file + +NamesAndTypes toNamesAndTypes(const DAGSchema & dag_schema) +{ + NamesAndTypes names_and_types; + for (const auto & col : dag_schema) + { + auto tp = getDataTypeByColumnInfoForComputingLayer(col.second); + names_and_types.emplace_back(col.first, tp); + } + return names_and_types; +} +} // namespace DB diff --git a/dbms/src/Flash/Coprocessor/GenSchemaAndColumn.h b/dbms/src/Flash/Coprocessor/GenSchemaAndColumn.h index 617f69de925..96f202d800e 100644 --- a/dbms/src/Flash/Coprocessor/GenSchemaAndColumn.h +++ b/dbms/src/Flash/Coprocessor/GenSchemaAndColumn.h @@ -16,6 +16,7 @@ #include #include +#include #include #include @@ -23,4 +24,5 @@ namespace DB { NamesAndTypes genNamesAndTypes(const TiDBTableScan & table_scan); ColumnsWithTypeAndName getColumnWithTypeAndName(const NamesAndTypes & names_and_types); -} // namespace DB \ No newline at end of file +NamesAndTypes toNamesAndTypes(const DAGSchema & dag_schema); +} // namespace DB diff --git a/dbms/src/Flash/Coprocessor/InterpreterUtils.cpp b/dbms/src/Flash/Coprocessor/InterpreterUtils.cpp index c9810454218..002a06d07b9 100644 --- a/dbms/src/Flash/Coprocessor/InterpreterUtils.cpp +++ b/dbms/src/Flash/Coprocessor/InterpreterUtils.cpp @@ -12,8 +12,12 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include +#include +#include #include #include +#include #include #include @@ -39,32 +43,6 @@ void restoreConcurrency( } } -BlockInputStreamPtr combinedNonJoinedDataStream( - DAGPipeline & pipeline, - size_t max_threads, - const LoggerPtr & log, - bool ignore_block) -{ - BlockInputStreamPtr ret = nullptr; - if (pipeline.streams_with_non_joined_data.size() == 1) - ret = pipeline.streams_with_non_joined_data.at(0); - else if (pipeline.streams_with_non_joined_data.size() > 1) - { - if (ignore_block) - { - ret = std::make_shared(pipeline.streams_with_non_joined_data, nullptr, max_threads, log->identifier()); - ret->setExtraInfo("combine non joined(ignore block)"); - } - else - { - ret = std::make_shared(pipeline.streams_with_non_joined_data, nullptr, max_threads, log->identifier()); - ret->setExtraInfo("combine non joined"); - } - } - pipeline.streams_with_non_joined_data.clear(); - return ret; -} - void executeUnion( DAGPipeline & pipeline, size_t max_streams, @@ -72,21 +50,33 @@ void executeUnion( bool ignore_block, const String & extra_info) { - if (pipeline.streams.size() == 1 && pipeline.streams_with_non_joined_data.empty()) - return; - auto non_joined_data_stream = combinedNonJoinedDataStream(pipeline, max_streams, log, ignore_block); - if (!pipeline.streams.empty()) + switch (pipeline.streams.size() + pipeline.streams_with_non_joined_data.size()) + { + case 0: + break; + case 1: { + if (pipeline.streams.size() == 1) + break; + // streams_with_non_joined_data's size is 1. + pipeline.streams.push_back(pipeline.streams_with_non_joined_data.at(0)); + pipeline.streams_with_non_joined_data.clear(); + break; + } + default: + { + BlockInputStreamPtr stream; if (ignore_block) - pipeline.firstStream() = std::make_shared(pipeline.streams, non_joined_data_stream, max_streams, log->identifier()); + stream = std::make_shared(pipeline.streams, pipeline.streams_with_non_joined_data, max_streams, log->identifier()); else - pipeline.firstStream() = std::make_shared(pipeline.streams, non_joined_data_stream, max_streams, log->identifier()); - pipeline.firstStream()->setExtraInfo(extra_info); + stream = std::make_shared(pipeline.streams, pipeline.streams_with_non_joined_data, max_streams, log->identifier()); + stream->setExtraInfo(extra_info); + pipeline.streams.resize(1); + pipeline.streams_with_non_joined_data.clear(); + pipeline.firstStream() = std::move(stream); + break; } - else if (non_joined_data_stream != nullptr) - { - pipeline.streams.push_back(non_joined_data_stream); } } @@ -102,4 +92,77 @@ ExpressionActionsPtr generateProjectExpressionActions( project->add(ExpressionAction::project(project_cols)); return project; } + +void executeExpression( + DAGPipeline & pipeline, + const ExpressionActionsPtr & expr_actions, + const LoggerPtr & log, + const String & extra_info) +{ + if (expr_actions && !expr_actions->getActions().empty()) + { + pipeline.transform([&](auto & stream) { + stream = std::make_shared(stream, expr_actions, log->identifier()); + stream->setExtraInfo(extra_info); + }); + } +} + +void orderStreams( + DAGPipeline & pipeline, + size_t max_streams, + SortDescription order_descr, + Int64 limit, + bool enable_fine_grained_shuffle, + const Context & context, + const LoggerPtr & log) +{ + const Settings & settings = context.getSettingsRef(); + String extra_info; + if (enable_fine_grained_shuffle) + extra_info = enableFineGrainedShuffleExtraInfo; + + pipeline.transform([&](auto & stream) { + auto sorting_stream = std::make_shared(stream, order_descr, log->identifier(), limit); + + /// Limits on sorting + IProfilingBlockInputStream::LocalLimits limits; + limits.mode = IProfilingBlockInputStream::LIMITS_TOTAL; + limits.size_limits = SizeLimits(settings.max_rows_to_sort, settings.max_bytes_to_sort, settings.sort_overflow_mode); + sorting_stream->setLimits(limits); + + stream = sorting_stream; + stream->setExtraInfo(extra_info); + }); + + if (enable_fine_grained_shuffle) + { + pipeline.transform([&](auto & stream) { + stream = std::make_shared( + stream, + order_descr, + settings.max_block_size, + limit, + settings.max_bytes_before_external_sort, + context.getTemporaryPath(), + log->identifier()); + stream->setExtraInfo(enableFineGrainedShuffleExtraInfo); + }); + } + else + { + /// If there are several streams, we merge them into one + executeUnion(pipeline, max_streams, log, false, "for partial order"); + + /// Merge the sorted blocks. + pipeline.firstStream() = std::make_shared( + pipeline.firstStream(), + order_descr, + settings.max_block_size, + limit, + settings.max_bytes_before_external_sort, + context.getTemporaryPath(), + log->identifier()); + } +} } // namespace DB diff --git a/dbms/src/Flash/Coprocessor/InterpreterUtils.h b/dbms/src/Flash/Coprocessor/InterpreterUtils.h index 5c4d4721d5e..bd64346718c 100644 --- a/dbms/src/Flash/Coprocessor/InterpreterUtils.h +++ b/dbms/src/Flash/Coprocessor/InterpreterUtils.h @@ -15,6 +15,7 @@ #pragma once #include +#include #include #include @@ -44,4 +45,19 @@ ExpressionActionsPtr generateProjectExpressionActions( const BlockInputStreamPtr & stream, const Context & context, const NamesWithAliases & project_cols); + +void executeExpression( + DAGPipeline & pipeline, + const ExpressionActionsPtr & expr_actions, + const LoggerPtr & log, + const String & extra_info = ""); + +void orderStreams( + DAGPipeline & pipeline, + size_t max_streams, + SortDescription order_descr, + Int64 limit, + bool enable_fine_grained_shuffle, + const Context & context, + const LoggerPtr & log); } // namespace DB diff --git a/dbms/src/Flash/Coprocessor/StreamingDAGResponseWriter.cpp b/dbms/src/Flash/Coprocessor/StreamingDAGResponseWriter.cpp index f915653fe96..a72dfcc16ef 100644 --- a/dbms/src/Flash/Coprocessor/StreamingDAGResponseWriter.cpp +++ b/dbms/src/Flash/Coprocessor/StreamingDAGResponseWriter.cpp @@ -23,6 +23,8 @@ #include #include +#include + namespace DB { namespace ErrorCodes @@ -37,8 +39,8 @@ inline void serializeToPacket(mpp::MPPDataPacket & packet, const tipb::SelectRes throw Exception(fmt::format("Fail to serialize response, response size: {}", response.ByteSizeLong())); } -template -StreamingDAGResponseWriter::StreamingDAGResponseWriter( +template +StreamingDAGResponseWriter::StreamingDAGResponseWriter( StreamWriterPtr writer_, std::vector partition_col_ids_, TiDB::TiDBCollators collators_, @@ -46,7 +48,9 @@ StreamingDAGResponseWriter::StreamingDAGResponseWriter( Int64 records_per_chunk_, Int64 batch_send_min_limit_, bool should_send_exec_summary_at_last_, - DAGContext & dag_context_) + DAGContext & dag_context_, + uint64_t fine_grained_shuffle_stream_count_, + UInt64 fine_grained_shuffle_batch_size_) : DAGResponseWriter(records_per_chunk_, dag_context_) , batch_send_min_limit(batch_send_min_limit_) , should_send_exec_summary_at_last(should_send_exec_summary_at_last_) @@ -54,6 +58,8 @@ StreamingDAGResponseWriter::StreamingDAGResponseWriter( , writer(writer_) , partition_col_ids(std::move(partition_col_ids_)) , collators(std::move(collators_)) + , fine_grained_shuffle_stream_count(fine_grained_shuffle_stream_count_) + , fine_grained_shuffle_batch_size(fine_grained_shuffle_batch_size_) { rows_in_blocks = 0; partition_num = writer_->getPartitionNum(); @@ -71,17 +77,37 @@ StreamingDAGResponseWriter::StreamingDAGResponseWriter( } } -template -void StreamingDAGResponseWriter::finishWrite() +template +void StreamingDAGResponseWriter::finishWrite() { if (should_send_exec_summary_at_last) - batchWrite(); + { + if constexpr (enable_fine_grained_shuffle) + { + assert(exchange_type == tipb::ExchangeType::Hash); + batchWriteFineGrainedShuffle(); + } + else + { + batchWrite(); + } + } else - batchWrite(); + { + if constexpr (enable_fine_grained_shuffle) + { + assert(exchange_type == tipb::ExchangeType::Hash); + batchWriteFineGrainedShuffle(); + } + else + { + batchWrite(); + } + } } -template -void StreamingDAGResponseWriter::write(const Block & block) +template +void StreamingDAGResponseWriter::write(const Block & block) { if (block.columns() != dag_context.result_field_types.size()) throw TiFlashException("Output column size mismatch with field type size", Errors::Coprocessor::Internal); @@ -91,15 +117,23 @@ void StreamingDAGResponseWriter::write(const Block & block) { blocks.push_back(block); } - if (static_cast(rows_in_blocks) > (dag_context.encode_type == tipb::EncodeType::TypeCHBlock ? batch_send_min_limit : records_per_chunk - 1)) + + if constexpr (enable_fine_grained_shuffle) { - batchWrite(); + assert(exchange_type == tipb::ExchangeType::Hash); + if (static_cast(rows_in_blocks) >= fine_grained_shuffle_batch_size) + batchWriteFineGrainedShuffle(); + } + else + { + if (static_cast(rows_in_blocks) > (dag_context.encode_type == tipb::EncodeType::TypeCHBlock ? batch_send_min_limit : records_per_chunk - 1)) + batchWrite(); } } -template +template template -void StreamingDAGResponseWriter::encodeThenWriteBlocks( +void StreamingDAGResponseWriter::encodeThenWriteBlocks( const std::vector & input_blocks, tipb::SelectResponse & response) const { @@ -191,133 +225,238 @@ void StreamingDAGResponseWriter::encodeThenWriteBlocks( } } -/// hash exchanging data among only TiFlash nodes. -template + +template template -void StreamingDAGResponseWriter::partitionAndEncodeThenWriteBlocks( - std::vector & input_blocks, - tipb::SelectResponse & response) const +void StreamingDAGResponseWriter::batchWrite() { - std::vector packet(partition_num); - - std::vector responses_row_count(partition_num); + tipb::SelectResponse response; + if constexpr (send_exec_summary_at_last) + addExecuteSummaries(response, !dag_context.isMPPTask() || dag_context.isRootMPPTask()); + if (exchange_type == tipb::ExchangeType::Hash) + { + partitionAndEncodeThenWriteBlocks(blocks, response); + } + else + { + encodeThenWriteBlocks(blocks, response); + } + blocks.clear(); + rows_in_blocks = 0; +} +template +template +void StreamingDAGResponseWriter::handleExecSummary( + const std::vector & input_blocks, + std::vector & packet, + tipb::SelectResponse & response) const +{ if constexpr (send_exec_summary_at_last) { /// Sending the response to only one node, default the first one. serializeToPacket(packet[0], response); - } - if (input_blocks.empty()) - { - if constexpr (send_exec_summary_at_last) + // No need to send data when blocks are not empty, + // because exec_summary will be sent together with blocks. + if (input_blocks.empty()) { for (auto part_id = 0; part_id < partition_num; ++part_id) { writer->write(packet[part_id], part_id); } } - return; } +} - // partition tuples in blocks - // 1) compute partition id - // 2) partition each row - // 3) encode each chunk and send it - std::vector partition_key_containers(collators.size()); - for (auto & block : input_blocks) +template +template +void StreamingDAGResponseWriter::writePackets(const std::vector & responses_row_count, + std::vector & packets) const +{ + for (size_t part_id = 0; part_id < packets.size(); ++part_id) { - std::vector dest_blocks(partition_num); - std::vector dest_tbl_cols(partition_num); - - for (size_t i = 0; i < block.columns(); ++i) + if constexpr (send_exec_summary_at_last) { - if (ColumnPtr converted = block.getByPosition(i).column->convertToFullColumnIfConst()) - { - block.getByPosition(i).column = converted; - } + writer->write(packets[part_id], part_id); } - - for (auto i = 0; i < partition_num; ++i) + else { - dest_tbl_cols[i] = block.cloneEmptyColumns(); - dest_blocks[i] = block.cloneEmpty(); + if (responses_row_count[part_id] > 0) + writer->write(packets[part_id], part_id); } + } +} - size_t rows = block.rows(); - WeakHash32 hash(rows); - - // get hash values by all partition key columns - for (size_t i = 0; i < partition_col_ids.size(); i++) +inline void initInputBlocks(std::vector & input_blocks) +{ + for (auto & input_block : input_blocks) + { + for (size_t i = 0; i < input_block.columns(); ++i) { - block.getByPosition(partition_col_ids[i]).column->updateWeakHash32(hash, collators[i], partition_key_containers[i]); + if (ColumnPtr converted = input_block.getByPosition(i).column->convertToFullColumnIfConst()) + input_block.getByPosition(i).column = converted; } - const auto & hash_data = hash.getData(); + } +} - // partition each row - IColumn::Selector selector(rows); - for (size_t row = 0; row < rows; ++row) - { - /// Row from interval [(2^32 / partition_num) * i, (2^32 / partition_num) * (i + 1)) goes to bucket with number i. - selector[row] = hash_data[row]; /// [0, 2^32) - selector[row] *= partition_num; /// [0, partition_num * 2^32), selector stores 64 bit values. - selector[row] >>= 32u; /// [0, partition_num) - } +inline void initDestColumns(const Block & input_block, std::vector & dest_tbl_cols) +{ + for (auto & cols : dest_tbl_cols) + { + cols = input_block.cloneEmptyColumns(); + } +} - for (size_t col_id = 0; col_id < block.columns(); ++col_id) - { - // Scatter columns to different partitions - auto scattered_columns = block.getByPosition(col_id).column->scatter(partition_num, selector); - for (size_t part_id = 0; part_id < partition_num; ++part_id) - { - dest_tbl_cols[part_id][col_id] = std::move(scattered_columns[part_id]); - } - } - // serialize each partitioned block and write it to its destination - for (auto part_id = 0; part_id < partition_num; ++part_id) - { - dest_blocks[part_id].setColumns(std::move(dest_tbl_cols[part_id])); - responses_row_count[part_id] += dest_blocks[part_id].rows(); - chunk_codec_stream->encode(dest_blocks[part_id], 0, dest_blocks[part_id].rows()); - packet[part_id].add_chunks(chunk_codec_stream->getString()); - chunk_codec_stream->clear(); - } +void computeHash(const Block & input_block, + uint32_t bucket_num, + const TiDB::TiDBCollators & collators, + std::vector & partition_key_containers, + const std::vector & partition_col_ids, + std::vector> & result_columns) +{ + size_t rows = input_block.rows(); + WeakHash32 hash(rows); + + // get hash values by all partition key columns + for (size_t i = 0; i < partition_col_ids.size(); ++i) + { + input_block.getByPosition(partition_col_ids[i]).column->updateWeakHash32(hash, collators[i], partition_key_containers[i]); } - for (auto part_id = 0; part_id < partition_num; ++part_id) + const auto & hash_data = hash.getData(); + + // partition each row + IColumn::Selector selector(rows); + for (size_t row = 0; row < rows; ++row) { - if constexpr (send_exec_summary_at_last) + /// Row from interval [(2^32 / bucket_num) * i, (2^32 / bucket_num) * (i + 1)) goes to bucket with number i. + selector[row] = hash_data[row]; /// [0, 2^32) + selector[row] *= bucket_num; /// [0, bucket_num * 2^32), selector stores 64 bit values. + selector[row] >>= 32u; /// [0, bucket_num) + } + + for (size_t col_id = 0; col_id < input_block.columns(); ++col_id) + { + // Scatter columns to different partitions + std::vector part_columns = input_block.getByPosition(col_id).column->scatter(bucket_num, selector); + assert(part_columns.size() == bucket_num); + for (size_t bucket_idx = 0; bucket_idx < bucket_num; ++bucket_idx) { - writer->write(packet[part_id], part_id); + result_columns[bucket_idx][col_id] = std::move(part_columns[bucket_idx]); } - else + } +} + +/// Hash exchanging data among only TiFlash nodes. Only be called when enable_fine_grained_shuffle is false. +template +template +void StreamingDAGResponseWriter::partitionAndEncodeThenWriteBlocks( + std::vector & input_blocks, + tipb::SelectResponse & response) const +{ + static_assert(!enable_fine_grained_shuffle); + std::vector packet(partition_num); + std::vector responses_row_count(partition_num); + handleExecSummary(input_blocks, packet, response); + if (input_blocks.empty()) + return; + + initInputBlocks(input_blocks); + Block dest_block = input_blocks[0].cloneEmpty(); + std::vector partition_key_containers(collators.size()); + for (const auto & block : input_blocks) + { + std::vector dest_tbl_cols(partition_num); + initDestColumns(block, dest_tbl_cols); + + computeHash(block, partition_num, collators, partition_key_containers, partition_col_ids, dest_tbl_cols); + + for (size_t part_id = 0; part_id < partition_num; ++part_id) { - if (responses_row_count[part_id] > 0) - writer->write(packet[part_id], part_id); + dest_block.setColumns(std::move(dest_tbl_cols[part_id])); + responses_row_count[part_id] += dest_block.rows(); + chunk_codec_stream->encode(dest_block, 0, dest_block.rows()); + packet[part_id].add_chunks(chunk_codec_stream->getString()); + chunk_codec_stream->clear(); } } + + writePackets(responses_row_count, packet); } -template +/// Hash exchanging data among only TiFlash nodes. Only be called when enable_fine_grained_shuffle is true. +template template -void StreamingDAGResponseWriter::batchWrite() +void StreamingDAGResponseWriter::batchWriteFineGrainedShuffle() { + static_assert(enable_fine_grained_shuffle); + assert(exchange_type == tipb::ExchangeType::Hash); + assert(fine_grained_shuffle_stream_count <= 1024); + tipb::SelectResponse response; if constexpr (send_exec_summary_at_last) addExecuteSummaries(response, !dag_context.isMPPTask() || dag_context.isRootMPPTask()); - if (exchange_type == tipb::ExchangeType::Hash) - { - partitionAndEncodeThenWriteBlocks(blocks, response); - } - else + + std::vector packet(partition_num); + std::vector responses_row_count(partition_num, 0); + + // fine_grained_shuffle_stream_count is in [0, 1024], and partition_num is uint16_t, so will not overflow. + uint32_t bucket_num = partition_num * fine_grained_shuffle_stream_count; + handleExecSummary(blocks, packet, response); + if (!blocks.empty()) { - encodeThenWriteBlocks(blocks, response); + std::vector final_dest_tbl_columns(bucket_num); + initInputBlocks(blocks); + initDestColumns(blocks[0], final_dest_tbl_columns); + + // Hash partition input_blocks into bucket_num. + for (const auto & block : blocks) + { + std::vector partition_key_containers(collators.size()); + std::vector dest_tbl_columns(bucket_num); + initDestColumns(block, dest_tbl_columns); + computeHash(block, bucket_num, collators, partition_key_containers, partition_col_ids, dest_tbl_columns); + for (size_t bucket_idx = 0; bucket_idx < bucket_num; ++bucket_idx) + { + for (size_t col_id = 0; col_id < block.columns(); ++col_id) + { + const MutableColumnPtr & src_col = dest_tbl_columns[bucket_idx][col_id]; + final_dest_tbl_columns[bucket_idx][col_id]->insertRangeFrom(*src_col, 0, src_col->size()); + } + } + } + + // For i-th stream_count buckets, send to i-th tiflash node. + for (size_t bucket_idx = 0; bucket_idx < bucket_num; bucket_idx += fine_grained_shuffle_stream_count) + { + size_t part_id = bucket_idx / fine_grained_shuffle_stream_count; // NOLINT(clang-analyzer-core.DivideZero) + size_t row_count_per_part = 0; + for (uint64_t stream_idx = 0; stream_idx < fine_grained_shuffle_stream_count; ++stream_idx) + { + Block dest_block = blocks[0].cloneEmpty(); + // For now we put all rows into one Block, may cause this Block too large. + dest_block.setColumns(std::move(final_dest_tbl_columns[bucket_idx + stream_idx])); + row_count_per_part += dest_block.rows(); + + chunk_codec_stream->encode(dest_block, 0, dest_block.rows()); + packet[part_id].add_chunks(chunk_codec_stream->getString()); + packet[part_id].add_stream_ids(stream_idx); + chunk_codec_stream->clear(); + } + responses_row_count[part_id] = row_count_per_part; + } } + + writePackets(responses_row_count, packet); + blocks.clear(); rows_in_blocks = 0; } -template class StreamingDAGResponseWriter; -template class StreamingDAGResponseWriter; +template class StreamingDAGResponseWriter; +template class StreamingDAGResponseWriter; +template class StreamingDAGResponseWriter; +template class StreamingDAGResponseWriter; } // namespace DB diff --git a/dbms/src/Flash/Coprocessor/StreamingDAGResponseWriter.h b/dbms/src/Flash/Coprocessor/StreamingDAGResponseWriter.h index 9b5e3864c64..cd7559d1e79 100644 --- a/dbms/src/Flash/Coprocessor/StreamingDAGResponseWriter.h +++ b/dbms/src/Flash/Coprocessor/StreamingDAGResponseWriter.h @@ -33,7 +33,7 @@ namespace DB /// Serializes the stream of blocks and sends them to TiDB or TiFlash with different serialization paths. /// When sending data to TiDB, blocks with extra info are written into tipb::SelectResponse, then the whole tipb::SelectResponse is further serialized into mpp::MPPDataPacket.data. /// Differently when sending data to TiFlash, blocks with only tuples are directly serialized into mpp::MPPDataPacket.chunks, but for the last block, its extra info (like execution summaries) is written into tipb::SelectResponse, then further serialized into mpp::MPPDataPacket.data. -template +template class StreamingDAGResponseWriter : public DAGResponseWriter { public: @@ -45,18 +45,30 @@ class StreamingDAGResponseWriter : public DAGResponseWriter Int64 records_per_chunk_, Int64 batch_send_min_limit_, bool should_send_exec_summary_at_last, - DAGContext & dag_context_); + DAGContext & dag_context_, + UInt64 fine_grained_shuffle_stream_count_, + UInt64 fine_grained_shuffle_batch_size); void write(const Block & block) override; void finishWrite() override; private: template void batchWrite(); + template + void batchWriteFineGrainedShuffle(); + template void encodeThenWriteBlocks(const std::vector & input_blocks, tipb::SelectResponse & response) const; template void partitionAndEncodeThenWriteBlocks(std::vector & input_blocks, tipb::SelectResponse & response) const; + template + void handleExecSummary(const std::vector & input_blocks, + std::vector & packet, + tipb::SelectResponse & response) const; + template + void writePackets(const std::vector & responses_row_count, std::vector & packets) const; + Int64 batch_send_min_limit; bool should_send_exec_summary_at_last; /// only one stream needs to sending execution summaries at last. tipb::ExchangeType exchange_type; @@ -67,6 +79,8 @@ class StreamingDAGResponseWriter : public DAGResponseWriter size_t rows_in_blocks; uint16_t partition_num; std::unique_ptr chunk_codec_stream; + UInt64 fine_grained_shuffle_stream_count; + UInt64 fine_grained_shuffle_batch_size; }; } // namespace DB diff --git a/dbms/src/Flash/Coprocessor/TiDBColumn.cpp b/dbms/src/Flash/Coprocessor/TiDBColumn.cpp index 7183374a5c1..eef89696d3a 100644 --- a/dbms/src/Flash/Coprocessor/TiDBColumn.cpp +++ b/dbms/src/Flash/Coprocessor/TiDBColumn.cpp @@ -28,7 +28,7 @@ template void encodeLittleEndian(const T & value, WriteBuffer & ss) { auto v = toLittleEndian(value); - ss.write(reinterpret_cast(&v), sizeof(v)); + ss.template writeFixed(&v); } TiDBColumn::TiDBColumn(Int8 element_len_) @@ -141,10 +141,10 @@ void TiDBColumn::append(const TiDBDecimal & decimal) encodeLittleEndian(decimal.digits_int, *data); encodeLittleEndian(decimal.digits_frac, *data); encodeLittleEndian(decimal.result_frac, *data); - encodeLittleEndian((UInt8)decimal.negative, *data); - for (int i = 0; i < MAX_WORD_BUF_LEN; i++) + encodeLittleEndian(static_cast(decimal.negative), *data); + for (int i : decimal.word_buf) { - encodeLittleEndian(decimal.word_buf[i], *data); + encodeLittleEndian(i, *data); } finishAppendFixed(); } diff --git a/dbms/src/Flash/Coprocessor/tests/gtest_streaming_dag_writer.cpp b/dbms/src/Flash/Coprocessor/tests/gtest_streaming_dag_writer.cpp new file mode 100644 index 00000000000..5d4186123b7 --- /dev/null +++ b/dbms/src/Flash/Coprocessor/tests/gtest_streaming_dag_writer.cpp @@ -0,0 +1,184 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include +#include +#include + +#include +#include + +namespace DB +{ +namespace tests +{ + +using BlockPtr = std::shared_ptr; +class TestStreamingDAGResponseWriter : public testing::Test +{ +protected: + void SetUp() override + { + dag_context_ptr = std::make_unique(1024); + dag_context_ptr->encode_type = tipb::EncodeType::TypeCHBlock; + dag_context_ptr->is_mpp_task = true; + dag_context_ptr->is_root_mpp_task = false; + dag_context_ptr->result_field_types = makeFields(); + context.setDAGContext(dag_context_ptr.get()); + } + +public: + TestStreamingDAGResponseWriter() + : context(TiFlashTestEnv::getContext()) + , part_col_ids{0} + , part_col_collators{ + TiDB::ITiDBCollator::getCollator(TiDB::ITiDBCollator::BINARY)} + {} + + // Return 10 Int64 column. + static std::vector makeFields() + { + std::vector fields(10); + for (int i = 0; i < 10; ++i) + { + fields[i].set_tp(TiDB::TypeLongLong); + } + return fields; + } + + // Return a block with **rows** and 10 Int64 column. + static BlockPtr prepareBlock(const std::vector & rows) + { + BlockPtr block = std::make_shared(); + for (int i = 0; i < 10; ++i) + { + DataTypePtr int64_data_type = std::make_shared(); + DataTypePtr nullable_int64_data_type = std::make_shared(int64_data_type); + MutableColumnPtr int64_col = nullable_int64_data_type->createColumn(); + for (Int64 r : rows) + { + int64_col->insert(Field(r)); + } + block->insert(ColumnWithTypeAndName{std::move(int64_col), + nullable_int64_data_type, + String("col") + std::to_string(i)}); + } + return block; + } + + Context context; + std::vector part_col_ids; + TiDB::TiDBCollators part_col_collators; + + std::unique_ptr dag_context_ptr; +}; + +using MockStreamWriterChecker = std::function; + +struct MockStreamWriter +{ + MockStreamWriter(MockStreamWriterChecker checker_, + uint16_t part_num_) + : checker(checker_) + , part_num(part_num_) + {} + + void write(mpp::MPPDataPacket &) { FAIL() << "cannot reach here, because we only expect hash partition"; } + void write(mpp::MPPDataPacket & packet, uint16_t part_id) { checker(packet, part_id); } + void write(tipb::SelectResponse &, uint16_t) { FAIL() << "cannot reach here, only consider CH Block format"; } + void write(tipb::SelectResponse &) { FAIL() << "cannot reach here, only consider CH Block format"; } + uint16_t getPartitionNum() const { return part_num; } + +private: + MockStreamWriterChecker checker; + uint16_t part_num; +}; + +// Input block data is distributed uniform. +// partition_num: 4 +// fine_grained_shuffle_stream_count: 8 +TEST_F(TestStreamingDAGResponseWriter, testBatchWriteFineGrainedShuffle) +try +{ + const size_t block_rows = 1024; + const uint16_t part_num = 4; + const uint32_t fine_grained_shuffle_stream_count = 8; + const Int64 fine_grained_shuffle_batch_size = 4096; + + // Set these to 1, because when fine grained shuffle is enabled, + // batchWriteFineGrainedShuffle() only check fine_grained_shuffle_batch_size. + // records_per_chunk and batch_send_min_limit are useless. + const Int64 records_per_chunk = 1; + const Int64 batch_send_min_limit = 1; + const bool should_send_exec_summary_at_last = true; + + // 1. Build Block. + std::vector uniform_data_set; + for (size_t i = 0; i < block_rows; ++i) + { + uniform_data_set.push_back(i); + } + BlockPtr block = prepareBlock(uniform_data_set); + + // 2. Build MockStreamWriter. + std::unordered_map write_report; + auto checker = [&write_report](mpp::MPPDataPacket & packet, uint16_t part_id) { + auto res = write_report.insert({part_id, packet}); + // Should always insert succeed. + // Because block.rows(1024) < fine_grained_shuffle_batch_size(4096), + // batchWriteFineGrainedShuffle() only called once, so will only be one packet for each partition. + ASSERT_TRUE(res.second); + }; + auto mock_writer = std::make_shared(checker, part_num); + + // 3. Start to write. + auto dag_writer = std::make_shared, /*enable_fine_grained_shuffle=*/true>>( + mock_writer, + part_col_ids, + part_col_collators, + tipb::ExchangeType::Hash, + records_per_chunk, + batch_send_min_limit, + should_send_exec_summary_at_last, + *dag_context_ptr, + fine_grained_shuffle_stream_count, + fine_grained_shuffle_batch_size); + dag_writer->write(*block); + dag_writer->finishWrite(); + + // 4. Start to check write_report. + std::vector decoded_blocks; + ASSERT_EQ(write_report.size(), part_num); + for (const auto & ele : write_report) + { + const mpp::MPPDataPacket & packet = ele.second; + ASSERT_EQ(packet.chunks_size(), packet.stream_ids_size()); + for (int i = 0; i < packet.chunks_size(); ++i) + { + decoded_blocks.push_back(CHBlockChunkCodec::decode(packet.chunks(i), *block)); + } + } + ASSERT_EQ(decoded_blocks.size(), fine_grained_shuffle_stream_count * part_num); + for (const auto & block : decoded_blocks) + { + ASSERT_EQ(block.rows(), block_rows / (fine_grained_shuffle_stream_count * part_num)); + } +} +CATCH + +} // namespace tests +} // namespace DB diff --git a/dbms/src/Flash/EstablishCall.cpp b/dbms/src/Flash/EstablishCall.cpp index 8af81e30962..2f8c7c15f56 100644 --- a/dbms/src/Flash/EstablishCall.cpp +++ b/dbms/src/Flash/EstablishCall.cpp @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include #include #include #include @@ -19,6 +20,11 @@ namespace DB { +namespace FailPoints +{ +extern const char random_tunnel_init_rpc_failure_failpoint[]; +} // namespace FailPoints + EstablishCallData::EstablishCallData(AsyncFlashService * service, grpc::ServerCompletionQueue * cq, grpc::ServerCompletionQueue * notify_cq, const std::shared_ptr> & is_shutdown) : service(service) , cq(cq) @@ -71,6 +77,7 @@ void EstablishCallData::initRpc() std::exception_ptr eptr = nullptr; try { + FAIL_POINT_TRIGGER_EXCEPTION(FailPoints::random_tunnel_init_rpc_failure_failpoint); service->establishMPPConnectionSyncOrAsync(&ctx, &request, nullptr, this); } catch (...) @@ -136,7 +143,7 @@ void EstablishCallData::finishTunnelAndResponder() state = FINISH; if (mpp_tunnel) { - mpp_tunnel->consumerFinish("grpc writes failed.", true); //trigger mpp tunnel finish work + mpp_tunnel->consumerFinish(fmt::format("{}: finishTunnelAndResponder called.", mpp_tunnel->id()), true); //trigger mpp tunnel finish work } grpc::Status status(static_cast(GRPC_STATUS_UNKNOWN), "Consumer exits unexpected, grpc writes failed."); responder.Finish(status, this); diff --git a/dbms/src/Flash/Mpp/ExchangeReceiver.cpp b/dbms/src/Flash/Mpp/ExchangeReceiver.cpp index f194afee31f..ab8d83a1481 100644 --- a/dbms/src/Flash/Mpp/ExchangeReceiver.cpp +++ b/dbms/src/Flash/Mpp/ExchangeReceiver.cpp @@ -13,6 +13,8 @@ // limitations under the License. #include +#include +#include #include #include #include @@ -22,6 +24,12 @@ namespace DB { +namespace FailPoints +{ +extern const char random_receiver_sync_msg_push_failure_failpoint[]; +extern const char random_receiver_async_msg_push_failure_failpoint[]; +} // namespace FailPoints + namespace { String getReceiverStateStr(const ExchangeReceiverState & s) @@ -41,6 +49,106 @@ String getReceiverStateStr(const ExchangeReceiverState & s) } } +// If enable_fine_grained_shuffle: +// Seperate chunks according to packet.stream_ids[i], then push to msg_channels[stream_id]. +// If fine grained_shuffle is disabled: +// Push all chunks to msg_channels[0]. +// Return true if all push succeed, otherwise return false. +// NOTE: shared_ptr will be hold by all ExchangeReceiverBlockInputStream to make chunk pointer valid. +template +bool pushPacket(size_t source_index, + const String & req_info, + MPPDataPacketPtr & packet, + const std::vector & msg_channels, + LoggerPtr & log) +{ + bool push_succeed = true; + + const mpp::Error * error_ptr = nullptr; + if (packet->has_error()) + error_ptr = &packet->error(); + const String * resp_ptr = nullptr; + if (!packet->data().empty()) + resp_ptr = &packet->data(); + + if constexpr (enable_fine_grained_shuffle) + { + std::vector> chunks(msg_channels.size()); + if (!packet->chunks().empty()) + { + // Packet not empty. + if (unlikely(packet->stream_ids().empty())) + { + // Fine grained shuffle is enabled in receiver, but sender didn't. We cannot handle this, so return error. + // This can happen when there are old version nodes when upgrading. + LOG_FMT_ERROR(log, "MPPDataPacket.stream_ids empty, it means ExchangeSender is old version of binary " + "(source_index: {}) while fine grained shuffle of ExchangeReceiver is enabled. " + "Cannot handle this.", + source_index); + return false; + } + // packet.stream_ids[i] is corresponding to packet.chunks[i], + // indicating which stream_id this chunk belongs to. + assert(packet->chunks_size() == packet->stream_ids_size()); + + for (int i = 0; i < packet->stream_ids_size(); ++i) + { + UInt64 stream_id = packet->stream_ids(i) % msg_channels.size(); + chunks[stream_id].push_back(&packet->chunks(i)); + } + } + // Still need to send error_ptr or resp_ptr even if packet.chunks_size() is zero. + for (size_t i = 0; i < msg_channels.size() && push_succeed; ++i) + { + if (resp_ptr == nullptr && error_ptr == nullptr && chunks[i].empty()) + continue; + + std::shared_ptr recv_msg = std::make_shared( + source_index, + req_info, + packet, + error_ptr, + resp_ptr, + std::move(chunks[i])); + push_succeed = msg_channels[i]->push(std::move(recv_msg)); + if constexpr (is_sync) + fiu_do_on(FailPoints::random_receiver_sync_msg_push_failure_failpoint, push_succeed = false;); + else + fiu_do_on(FailPoints::random_receiver_async_msg_push_failure_failpoint, push_succeed = false;); + + // Only the first ExchangeReceiverInputStream need to handle resp. + resp_ptr = nullptr; + } + } + else + { + std::vector chunks(packet->chunks_size()); + for (int i = 0; i < packet->chunks_size(); ++i) + { + chunks[i] = &packet->chunks(i); + } + + if (!(resp_ptr == nullptr && error_ptr == nullptr && chunks.empty())) + { + std::shared_ptr recv_msg = std::make_shared( + source_index, + req_info, + packet, + error_ptr, + resp_ptr, + std::move(chunks)); + + push_succeed = msg_channels[0]->push(std::move(recv_msg)); + if constexpr (is_sync) + fiu_do_on(FailPoints::random_receiver_sync_msg_push_failure_failpoint, push_succeed = false;); + else + fiu_do_on(FailPoints::random_receiver_async_msg_push_failure_failpoint, push_succeed = false;); + } + } + LOG_FMT_DEBUG(log, "push recv_msg to msg_channels(size: {}) succeed:{}, enable_fine_grained_shuffle: {}", msg_channels.size(), push_succeed, enable_fine_grained_shuffle); + return push_succeed; +} + enum class AsyncRequestStage { NEED_INIT, @@ -57,25 +165,25 @@ using TimePoint = Clock::time_point; constexpr Int32 max_retry_times = 10; constexpr Int32 batch_packet_count = 16; -template +template class AsyncRequestHandler : public UnaryCallback { public: using Status = typename RPCContext::Status; using Request = typename RPCContext::Request; using AsyncReader = typename RPCContext::AsyncReader; - using Self = AsyncRequestHandler; + using Self = AsyncRequestHandler; AsyncRequestHandler( MPMCQueue * queue, - MPMCQueue> * msg_channel_, + std::vector * msg_channels_, const std::shared_ptr & context, const Request & req, const String & req_id) : rpc_context(context) , request(&req) , notify_queue(queue) - , msg_channel(msg_channel_) + , msg_channels(msg_channels_) , req_info(fmt::format("tunnel{}+{}", req.send_task_id, req.recv_task_id)) , log(Logger::get("ExchangeReceiver", req_id, req_info)) { @@ -253,11 +361,7 @@ class AsyncRequestHandler : public UnaryCallback for (size_t i = 0; i < read_packet_index; ++i) { auto & packet = packets[i]; - auto recv_msg = std::make_shared(); - recv_msg->packet = std::move(packet); - recv_msg->source_index = request->source_index; - recv_msg->req_info = req_info; - if (!msg_channel->push(std::move(recv_msg))) + if (!pushPacket(request->source_index, req_info, packet, *msg_channels, log)) return false; // can't reuse packet since it is sent to readers. packet = std::make_shared(); @@ -274,7 +378,7 @@ class AsyncRequestHandler : public UnaryCallback std::shared_ptr rpc_context; const Request * request; // won't be null MPMCQueue * notify_queue; // won't be null - MPMCQueue> * msg_channel; // won't be null + std::vector * msg_channels; // won't be null String req_info; bool meet_error = false; @@ -299,20 +403,32 @@ ExchangeReceiverBase::ExchangeReceiverBase( size_t source_num_, size_t max_streams_, const String & req_id, - const String & executor_id) + const String & executor_id, + uint64_t fine_grained_shuffle_stream_count_) : rpc_context(std::move(rpc_context_)) , source_num(source_num_) , max_streams(max_streams_) , max_buffer_size(std::max(batch_packet_count, std::max(source_num, max_streams_) * 2)) , thread_manager(newThreadManager()) - , msg_channel(max_buffer_size) , live_connections(source_num) , state(ExchangeReceiverState::NORMAL) , exc_log(Logger::get("ExchangeReceiver", req_id, executor_id)) , collected(false) + , fine_grained_shuffle_stream_count(fine_grained_shuffle_stream_count_) { try { + if (enableFineGrainedShuffle(fine_grained_shuffle_stream_count_)) + { + for (size_t i = 0; i < max_streams_; ++i) + { + msg_channels.push_back(std::make_unique>>(max_buffer_size)); + } + } + else + { + msg_channels.push_back(std::make_unique>>(max_buffer_size)); + } rpc_context->fillSchema(schema); setUpConnection(); } @@ -349,14 +465,14 @@ template void ExchangeReceiverBase::cancel() { setEndState(ExchangeReceiverState::CANCELED); - msg_channel.finish(); + cancelAllMsgChannels(); } template void ExchangeReceiverBase::close() { setEndState(ExchangeReceiverState::CLOSED); - msg_channel.finish(); + finishAllMsgChannels(); } template @@ -371,7 +487,12 @@ void ExchangeReceiverBase::setUpConnection() async_requests.push_back(std::move(req)); else { - thread_manager->schedule(true, "Receiver", [this, req = std::move(req)] { readLoop(req); }); + thread_manager->schedule(true, "Receiver", [this, req = std::move(req)] { + if (enableFineGrainedShuffle(fine_grained_shuffle_stream_count)) + readLoop(req); + else + readLoop(req); + }); ++thread_count; } } @@ -379,15 +500,21 @@ void ExchangeReceiverBase::setUpConnection() // TODO: reduce this thread in the future. if (!async_requests.empty()) { - thread_manager->schedule(true, "RecvReactor", [this, async_requests = std::move(async_requests)] { reactor(async_requests); }); + thread_manager->schedule(true, "RecvReactor", [this, async_requests = std::move(async_requests)] { + if (enableFineGrainedShuffle(fine_grained_shuffle_stream_count)) + reactor(async_requests); + else + reactor(async_requests); + }); ++thread_count; } } template +template void ExchangeReceiverBase::reactor(const std::vector & async_requests) { - using AsyncHandler = AsyncRequestHandler; + using AsyncHandler = AsyncRequestHandler; GET_METRIC(tiflash_thread_count, type_threads_of_receiver_reactor).Increment(); SCOPE_EXIT({ @@ -403,7 +530,7 @@ void ExchangeReceiverBase::reactor(const std::vector & asyn std::vector> handlers; handlers.reserve(alive_async_connections); for (const auto & req : async_requests) - handlers.emplace_back(std::make_unique(&ready_requests, &msg_channel, rpc_context, req, exc_log->identifier())); + handlers.emplace_back(std::make_unique(&ready_requests, &msg_channels, rpc_context, req, exc_log->identifier())); while (alive_async_connections > 0) { @@ -415,7 +542,7 @@ void ExchangeReceiverBase::reactor(const std::vector & asyn for (Int32 i = 0; i < check_waiting_requests_freq; ++i) { AsyncHandler * handler = nullptr; - if (unlikely(!ready_requests.tryPop(handler, timeout))) + if (unlikely(!ready_requests.popTimeout(handler, timeout))) break; handler->handle(); @@ -448,6 +575,7 @@ void ExchangeReceiverBase::reactor(const std::vector & asyn } template +template void ExchangeReceiverBase::readLoop(const Request & req) { GET_METRIC(tiflash_thread_count, type_threads_of_receiver_read_loop).Increment(); @@ -472,18 +600,15 @@ void ExchangeReceiverBase::readLoop(const Request & req) for (;;) { LOG_FMT_TRACE(log, "begin next "); - auto recv_msg = std::make_shared(); - recv_msg->packet = std::make_shared(); - recv_msg->req_info = req_info; - recv_msg->source_index = req.source_index; - bool success = reader->read(recv_msg->packet); + MPPDataPacketPtr packet = std::make_shared(); + bool success = reader->read(packet); if (!success) break; has_data = true; - if (recv_msg->packet->has_error()) - throw Exception("Exchange receiver meet error : " + recv_msg->packet->error().msg()); + if (packet->has_error()) + throw Exception("Exchange receiver meet error : " + packet->error().msg()); - if (!msg_channel.push(std::move(recv_msg))) + if (!pushPacket(req.source_index, req_info, packet, msg_channels, log)) { meet_error = true; auto local_state = getState(); @@ -553,15 +678,15 @@ DecodeDetail ExchangeReceiverBase::decodeChunks( assert(recv_msg != nullptr); DecodeDetail detail; - int chunk_size = recv_msg->packet->chunks_size(); - if (chunk_size == 0) + if (recv_msg->chunks.empty()) return detail; + // Record total packet size even if fine grained shuffle is enabled. detail.packet_bytes = recv_msg->packet->ByteSizeLong(); - /// ExchangeReceiverBase should receive chunks of TypeCHBlock - for (int i = 0; i < chunk_size; ++i) + + for (const String * chunk : recv_msg->chunks) { - Block block = CHBlockChunkCodec::decode(recv_msg->packet->chunks(i), header); + Block block = CHBlockChunkCodec::decode(*chunk, header); detail.rows += block.rows(); if (unlikely(block.rows() == 0)) continue; @@ -571,10 +696,15 @@ DecodeDetail ExchangeReceiverBase::decodeChunks( } template -ExchangeReceiverResult ExchangeReceiverBase::nextResult(std::queue & block_queue, const Block & header) +ExchangeReceiverResult ExchangeReceiverBase::nextResult(std::queue & block_queue, const Block & header, size_t stream_id) { + if (unlikely(stream_id >= msg_channels.size())) + { + LOG_FMT_ERROR(exc_log, "stream_id out of range, stream_id: {}, total_stream_count: {}", stream_id, msg_channels.size()); + return {nullptr, 0, "", true, "stream_id out of range", false}; + } std::shared_ptr recv_msg; - if (!msg_channel.pop(recv_msg)) + if (!msg_channels[stream_id]->pop(recv_msg)) { std::unique_lock lock(mu); @@ -596,29 +726,32 @@ ExchangeReceiverResult ExchangeReceiverBase::nextResult(std::queuepacket != nullptr); + assert(recv_msg != nullptr); ExchangeReceiverResult result; - if (recv_msg->packet->has_error()) + if (recv_msg->error_ptr != nullptr) { - result = {nullptr, recv_msg->source_index, recv_msg->req_info, true, recv_msg->packet->error().msg(), false}; + result = {nullptr, recv_msg->source_index, recv_msg->req_info, true, recv_msg->error_ptr->msg(), false}; } else { - if (!recv_msg->packet->data().empty()) /// the data of the last packet is serialized from tipb::SelectResponse including execution summaries. + if (recv_msg->resp_ptr != nullptr) /// the data of the last packet is serialized from tipb::SelectResponse including execution summaries. { - auto resp_ptr = std::make_shared(); - if (!resp_ptr->ParseFromString(recv_msg->packet->data())) + auto select_resp = std::make_shared(); + if (!select_resp->ParseFromString(*(recv_msg->resp_ptr))) { result = {nullptr, recv_msg->source_index, recv_msg->req_info, true, "decode error", false}; } else { - result = {resp_ptr, recv_msg->source_index, recv_msg->req_info, false, "", false}; - /// If mocking TiFlash as TiDB, here should decode chunks from resp_ptr. - if (!resp_ptr->chunks().empty()) + result = {select_resp, recv_msg->source_index, recv_msg->req_info, false, "", false}; + /// If mocking TiFlash as TiDB, here should decode chunks from select_resp. + if (!select_resp->chunks().empty()) { - assert(recv_msg->packet->chunks().empty()); - result.decode_detail = CoprocessorReader::decodeChunks(resp_ptr, block_queue, header, schema); + assert(recv_msg->chunks.empty()); + // Fine grained shuffle should only be enabled when sending data to TiFlash node. + // So all data should be encoded into MPPDataPacket.chunks. + RUNTIME_CHECK(!enableFineGrainedShuffle(fine_grained_shuffle_stream_count), Exception, "Data should not be encoded into tipb::SelectResponse.chunks when fine grained shuffle is enabled"); + result.decode_detail = CoprocessorReader::decodeChunks(select_resp, block_queue, header, schema); } } } @@ -626,7 +759,7 @@ ExchangeReceiverResult ExchangeReceiverBase::nextResult(std::queuesource_index, recv_msg->req_info, false, "", false}; } - if (!result.meet_error && !recv_msg->packet->chunks().empty()) + if (!result.meet_error && !recv_msg->chunks.empty()) { assert(result.decode_detail.rows == 0); result.decode_detail = decodeChunks(recv_msg, block_queue, header); @@ -688,7 +821,21 @@ void ExchangeReceiverBase::connectionDone( throw Exception("live_connections should not be less than 0!"); if (meet_error || copy_live_conn == 0) - msg_channel.finish(); + finishAllMsgChannels(); +} + +template +void ExchangeReceiverBase::finishAllMsgChannels() +{ + for (auto & msg_channel : msg_channels) + msg_channel->finish(); +} + +template +void ExchangeReceiverBase::cancelAllMsgChannels() +{ + for (auto & msg_channel : msg_channels) + msg_channel->cancel(); } /// Explicit template instantiations - to avoid code bloat in headers. diff --git a/dbms/src/Flash/Mpp/ExchangeReceiver.h b/dbms/src/Flash/Mpp/ExchangeReceiver.h index 830dc6241a9..708f133f226 100644 --- a/dbms/src/Flash/Mpp/ExchangeReceiver.h +++ b/dbms/src/Flash/Mpp/ExchangeReceiver.h @@ -35,9 +35,28 @@ namespace DB { struct ReceivedMessage { - std::shared_ptr packet; - size_t source_index = 0; + size_t source_index; String req_info; + // shared_ptr is copied to make sure error_ptr, resp_ptr and chunks are valid. + const std::shared_ptr packet; + const mpp::Error * error_ptr; + const String * resp_ptr; + std::vector chunks; + + // Constructor that move chunks. + ReceivedMessage(size_t source_index_, + const String & req_info_, + const std::shared_ptr & packet_, + const mpp::Error * error_ptr_, + const String * resp_ptr_, + std::vector && chunks_) + : source_index(source_index_) + , req_info(req_info_) + , packet(packet_) + , error_ptr(error_ptr_) + , resp_ptr(resp_ptr_) + , chunks(chunks_) + {} }; struct ExchangeReceiverResult @@ -78,6 +97,7 @@ enum class ExchangeReceiverState CLOSED, }; +using MsgChannelPtr = std::unique_ptr>>; template class ExchangeReceiverBase @@ -92,7 +112,8 @@ class ExchangeReceiverBase size_t source_num_, size_t max_streams_, const String & req_id, - const String & executor_id); + const String & executor_id, + uint64_t fine_grained_shuffle_stream_count); ~ExchangeReceiverBase(); @@ -104,9 +125,11 @@ class ExchangeReceiverBase ExchangeReceiverResult nextResult( std::queue & block_queue, - const Block & header); + const Block & header, + size_t stream_id); size_t getSourceNum() const { return source_num; } + uint64_t getFineGrainedShuffleStreamCount() const { return fine_grained_shuffle_stream_count; } int computeNewThreadCount() const { return thread_count; } @@ -128,7 +151,10 @@ class ExchangeReceiverBase using Request = typename RPCContext::Request; void setUpConnection(); + // Template argument enable_fine_grained_shuffle will be setup properly in setUpConnection(). + template void readLoop(const Request & req); + template void reactor(const std::vector & async_requests); bool setEndState(ExchangeReceiverState new_state); @@ -139,12 +165,14 @@ class ExchangeReceiverBase std::queue & block_queue, const Block & header); - void connectionDone( bool meet_error, const String & local_err_msg, const LoggerPtr & log); + void finishAllMsgChannels(); + void cancelAllMsgChannels(); + std::shared_ptr rpc_context; const tipb::ExchangeReceiver pb_exchange_receiver; @@ -156,7 +184,7 @@ class ExchangeReceiverBase std::shared_ptr thread_manager; DAGSchema schema; - MPMCQueue> msg_channel; + std::vector msg_channels; std::mutex mu; /// should lock `mu` when visit these members @@ -168,6 +196,7 @@ class ExchangeReceiverBase bool collected = false; int thread_count = 0; + uint64_t fine_grained_shuffle_stream_count; }; class ExchangeReceiver : public ExchangeReceiverBase diff --git a/dbms/src/Flash/Mpp/MPPHandler.cpp b/dbms/src/Flash/Mpp/MPPHandler.cpp index a3096aaa644..7f97a1dd698 100644 --- a/dbms/src/Flash/Mpp/MPPHandler.cpp +++ b/dbms/src/Flash/Mpp/MPPHandler.cpp @@ -31,7 +31,7 @@ void MPPHandler::handleError(const MPPTaskPtr & task, String error) try { if (task) - task->cancel(error); + task->handleError(error); } catch (...) { diff --git a/dbms/src/Flash/Mpp/MPPReceiverSet.cpp b/dbms/src/Flash/Mpp/MPPReceiverSet.cpp new file mode 100644 index 00000000000..60cca308c18 --- /dev/null +++ b/dbms/src/Flash/Mpp/MPPReceiverSet.cpp @@ -0,0 +1,48 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +namespace DB +{ +void MPPReceiverSet::addExchangeReceiver(const String & executor_id, const ExchangeReceiverPtr & exchange_receiver) +{ + RUNTIME_ASSERT(exchange_receiver_map.find(executor_id) == exchange_receiver_map.end(), log, "Duplicate executor_id: {} in DAGRequest", executor_id); + exchange_receiver_map[executor_id] = exchange_receiver; +} + +void MPPReceiverSet::addCoprocessorReader(const CoprocessorReaderPtr & coprocessor_reader) +{ + coprocessor_readers.push_back(coprocessor_reader); +} + +ExchangeReceiverPtr MPPReceiverSet::getExchangeReceiver(const String & executor_id) const +{ + auto it = exchange_receiver_map.find(executor_id); + if (unlikely(it == exchange_receiver_map.end())) + return nullptr; + return it->second; +} + +void MPPReceiverSet::cancel() +{ + for (auto & it : exchange_receiver_map) + { + it.second->cancel(); + } + for (auto & cop_reader : coprocessor_readers) + cop_reader->cancel(); +} +} // namespace DB diff --git a/dbms/src/Flash/Mpp/MPPReceiverSet.h b/dbms/src/Flash/Mpp/MPPReceiverSet.h new file mode 100644 index 00000000000..44274cb3ce8 --- /dev/null +++ b/dbms/src/Flash/Mpp/MPPReceiverSet.h @@ -0,0 +1,44 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include + +namespace DB +{ +class MPPReceiverSet +{ +public: + explicit MPPReceiverSet(const String & req_id) + : log(Logger::get("MPPReceiverSet", req_id)) + {} + void addExchangeReceiver(const String & executor_id, const ExchangeReceiverPtr & exchange_receiver); + void addCoprocessorReader(const CoprocessorReaderPtr & coprocessor_reader); + ExchangeReceiverPtr getExchangeReceiver(const String & executor_id) const; + void cancel(); + +private: + /// two kinds of receiver in MPP + /// ExchangeReceiver: receiver data from other MPPTask + /// CoprocessorReader: used in remote read + ExchangeReceiverMap exchange_receiver_map; + std::vector coprocessor_readers; + const LoggerPtr log; +}; + +using MPPReceiverSetPtr = std::shared_ptr; + +} // namespace DB diff --git a/dbms/src/Flash/Mpp/MPPTask.cpp b/dbms/src/Flash/Mpp/MPPTask.cpp index 40f03ff79ba..7ddc6af361f 100644 --- a/dbms/src/Flash/Mpp/MPPTask.cpp +++ b/dbms/src/Flash/Mpp/MPPTask.cpp @@ -51,6 +51,7 @@ extern const char exception_before_mpp_register_tunnel_for_root_mpp_task[]; extern const char exception_during_mpp_register_tunnel_for_non_root_mpp_task[]; extern const char exception_during_mpp_write_err_to_tunnel[]; extern const char force_no_local_region_for_mpp_task[]; +extern const char random_task_lifecycle_failpoint[]; } // namespace FailPoints MPPTask::MPPTask(const mpp::TaskMeta & meta_, const ContextPtr & context_) @@ -80,6 +81,34 @@ MPPTask::~MPPTask() LOG_FMT_DEBUG(log, "finish MPPTask: {}", id.toString()); } +void MPPTask::abortTunnels(const String & message, AbortType abort_type) +{ + if (abort_type == AbortType::ONCANCELLATION) + { + closeAllTunnels(message); + } + else + { + RUNTIME_ASSERT(tunnel_set != nullptr, log, "mpp task without tunnel set"); + tunnel_set->writeError(message); + } +} + +void MPPTask::abortReceivers() +{ + if (likely(receiver_set != nullptr)) + { + receiver_set->cancel(); + } +} + +void MPPTask::abortDataStreams(AbortType abort_type) +{ + /// When abort type is ONERROR, it means MPPTask already known it meet error, so let the remaining task stop silently to avoid too many useless error message + bool is_kill = abort_type == AbortType::ONCANCELLATION; + context->getProcessList().sendCancelToQuery(context->getCurrentQueryId(), context->getClientInfo().current_user, is_kill); +} + void MPPTask::closeAllTunnels(const String & reason) { if (likely(tunnel_set)) @@ -125,7 +154,7 @@ void MPPTask::registerTunnels(const mpp::DispatchTaskRequest & task_request) void MPPTask::initExchangeReceivers() { - mpp_exchange_receiver_map = std::make_shared(); + receiver_set = std::make_shared(log->identifier()); traverseExecutors(&dag_req, [&](const tipb::Executor & executor) { if (executor.tp() == tipb::ExecType::TypeExchangeReceiver) { @@ -143,27 +172,17 @@ void MPPTask::initExchangeReceivers() executor.exchange_receiver().encoded_task_meta_size(), context->getMaxStreams(), log->identifier(), - executor_id); + executor_id, + executor.fine_grained_shuffle_stream_count()); if (status != RUNNING) throw Exception("exchange receiver map can not be initialized, because the task is not in running state"); - (*mpp_exchange_receiver_map)[executor_id] = exchange_receiver; + receiver_set->addExchangeReceiver(executor_id, exchange_receiver); new_thread_count_of_exchange_receiver += exchange_receiver->computeNewThreadCount(); } return true; }); - dag_context->setMPPExchangeReceiverMap(mpp_exchange_receiver_map); -} - -void MPPTask::cancelAllExchangeReceivers() -{ - if (likely(mpp_exchange_receiver_map != nullptr)) - { - for (auto & it : *mpp_exchange_receiver_map) - { - it.second->cancel(); - } - } + dag_context->setMPPReceiverSet(receiver_set); } std::pair MPPTask::getTunnel(const ::mpp::EstablishMPPConnectionRequest * request) @@ -359,92 +378,124 @@ void MPPTask::runImpl() return_statistics.blocks, return_statistics.bytes); } - catch (Exception & e) - { - err_msg = e.displayText(); - LOG_FMT_ERROR(log, "task running meets error: {} Stack Trace : {}", err_msg, e.getStackTrace().toString()); - } - catch (pingcap::Exception & e) - { - err_msg = e.message(); - LOG_FMT_ERROR(log, "task running meets error: {}", err_msg); - } - catch (std::exception & e) - { - err_msg = e.what(); - LOG_FMT_ERROR(log, "task running meets error: {}", err_msg); - } catch (...) { - err_msg = "unrecovered error"; - LOG_FMT_ERROR(log, "task running meets error: {}", err_msg); + err_msg = getCurrentExceptionMessage(true, true); } + if (err_msg.empty()) { - // todo when error happens, should try to update the metrics if it is available - auto throughput = dag_context->getTableScanThroughput(); - if (throughput.first) - GET_METRIC(tiflash_storage_logical_throughput_bytes).Observe(throughput.second); - auto process_info = context->getProcessListElement()->getInfo(); - auto peak_memory = process_info.peak_memory_usage > 0 ? process_info.peak_memory_usage : 0; - GET_METRIC(tiflash_coprocessor_request_memory_usage, type_run_mpp_task).Observe(peak_memory); - mpp_task_statistics.setMemoryPeak(peak_memory); + if (switchStatus(RUNNING, FINISHED)) + LOG_INFO(log, "finish task"); + else + LOG_FMT_WARNING(log, "finish task which is in {} state", taskStatusToString(status)); + if (status == FINISHED) + { + // todo when error happens, should try to update the metrics if it is available + auto throughput = dag_context->getTableScanThroughput(); + if (throughput.first) + GET_METRIC(tiflash_storage_logical_throughput_bytes).Observe(throughput.second); + auto process_info = context->getProcessListElement()->getInfo(); + auto peak_memory = process_info.peak_memory_usage > 0 ? process_info.peak_memory_usage : 0; + GET_METRIC(tiflash_coprocessor_request_memory_usage, type_run_mpp_task).Observe(peak_memory); + mpp_task_statistics.setMemoryPeak(peak_memory); + } } else { - context->getProcessList().sendCancelToQuery(context->getCurrentQueryId(), context->getClientInfo().current_user, true); - cancelAllExchangeReceivers(); - writeErrToAllTunnels(err_msg); + if (status == RUNNING) + { + LOG_FMT_ERROR(log, "task running meets error: {}", err_msg); + /// trim the stack trace to avoid too many useless information in log + trimStackTrace(err_msg); + try + { + handleError(err_msg); + } + catch (...) + { + tryLogCurrentException(log, "Meet error while try to handle error in MPPTask"); + } + } } LOG_FMT_INFO(log, "task ends, time cost is {} ms.", stopwatch.elapsedMilliseconds()); - unregisterTask(); - - if (switchStatus(RUNNING, FINISHED)) - LOG_INFO(log, "finish task"); - else - LOG_WARNING(log, "finish task which was cancelled before"); + // unregister flag is only for FailPoint usage, to produce the situation that MPPTask is destructed + // by grpc CancelMPPTask thread; + bool unregister = true; + fiu_do_on(FailPoints::random_task_lifecycle_failpoint, { + if (!err_msg.empty()) + unregister = false; + }); + if (unregister) + unregisterTask(); - mpp_task_statistics.end(status.load(), err_msg); + mpp_task_statistics.end(status.load(), err_string); mpp_task_statistics.logTracingJson(); } -void MPPTask::writeErrToAllTunnels(const String & e) +void MPPTask::handleError(const String & error_msg) { - RUNTIME_ASSERT(tunnel_set != nullptr, log, "mpp task without tunnel set"); - tunnel_set->writeError(e); + if (manager == nullptr || !manager->isTaskToBeCancelled(id)) + abort(error_msg, AbortType::ONERROR); } -void MPPTask::cancel(const String & reason) +void MPPTask::abort(const String & message, AbortType abort_type) { - CPUAffinityManager::getInstance().bindSelfQueryThread(); - LOG_FMT_WARNING(log, "Begin cancel task: {}", id.toString()); + String abort_type_string; + TaskStatus next_task_status; + switch (abort_type) + { + case AbortType::ONCANCELLATION: + abort_type_string = "ONCANCELLATION"; + next_task_status = CANCELLED; + break; + case AbortType::ONERROR: + abort_type_string = "ONERROR"; + next_task_status = FAILED; + break; + } + LOG_FMT_WARNING(log, "Begin abort task: {}, abort type: {}", id.toString(), abort_type_string); while (true) { auto previous_status = status.load(); - if (previous_status == FINISHED || previous_status == CANCELLED) + if (previous_status == FINISHED || previous_status == CANCELLED || previous_status == FAILED) { - LOG_FMT_WARNING(log, "task already {}", (previous_status == FINISHED ? "finished" : "cancelled")); + LOG_FMT_WARNING(log, "task already in {} state", taskStatusToString(previous_status)); return; } - else if (previous_status == INITIALIZING && switchStatus(INITIALIZING, CANCELLED)) + else if (previous_status == INITIALIZING && switchStatus(INITIALIZING, next_task_status)) { - closeAllTunnels(reason); + err_string = message; + /// if the task is in initializing state, mpp task can return error to TiDB directly, + /// so just close all tunnels here + closeAllTunnels(message); unregisterTask(); - LOG_WARNING(log, "Finish cancel task from uninitialized"); + LOG_WARNING(log, "Finish abort task from uninitialized"); return; } - else if (previous_status == RUNNING && switchStatus(RUNNING, CANCELLED)) + else if (previous_status == RUNNING && switchStatus(RUNNING, next_task_status)) { + /// abort the components from top to bottom because if bottom components are aborted + /// first, the top components may see an error caused by the abort, which is not + /// the original error + err_string = message; + abortTunnels(message, abort_type); + abortDataStreams(abort_type); + abortReceivers(); scheduleThisTask(ScheduleState::FAILED); - context->getProcessList().sendCancelToQuery(context->getCurrentQueryId(), context->getClientInfo().current_user, true); - closeAllTunnels(reason); /// runImpl is running, leave remaining work to runImpl - LOG_WARNING(log, "Finish cancel task from running"); + LOG_WARNING(log, "Finish abort task from running"); return; } } } +void MPPTask::cancel(const String & reason) +{ + CPUAffinityManager::getInstance().bindSelfQueryThread(); + abort(reason, AbortType::ONCANCELLATION); +} + bool MPPTask::switchStatus(TaskStatus from, TaskStatus to) { return status.compare_exchange_strong(from, to); diff --git a/dbms/src/Flash/Mpp/MPPTask.h b/dbms/src/Flash/Mpp/MPPTask.h index c8423ac484c..a30150b26e8 100644 --- a/dbms/src/Flash/Mpp/MPPTask.h +++ b/dbms/src/Flash/Mpp/MPPTask.h @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -58,6 +59,8 @@ class MPPTask : public std::enable_shared_from_this void cancel(const String & reason); + void handleError(const String & error_msg); + void prepare(const mpp::DispatchTaskRequest & task_request); void run(); @@ -89,12 +92,22 @@ class MPPTask : public std::enable_shared_from_this void unregisterTask(); - void writeErrToAllTunnels(const String & e); - /// Similar to `writeErrToAllTunnels`, but it just try to write the error message to tunnel /// without waiting the tunnel to be connected void closeAllTunnels(const String & reason); + enum class AbortType + { + /// todo add ONKILL to distinguish between silent cancellation and kill + ONCANCELLATION, + ONERROR, + }; + void abort(const String & message, AbortType abort_type); + + void abortTunnels(const String & message, AbortType abort_type); + void abortReceivers(); + void abortDataStreams(AbortType abort_type); + void finishWrite(); bool switchStatus(TaskStatus from, TaskStatus to); @@ -109,8 +122,6 @@ class MPPTask : public std::enable_shared_from_this void initExchangeReceivers(); - void cancelAllExchangeReceivers(); - tipb::DAGRequest dag_req; ContextPtr context; @@ -120,14 +131,15 @@ class MPPTask : public std::enable_shared_from_this MemoryTracker * memory_tracker = nullptr; std::atomic status{INITIALIZING}; + String err_string; mpp::TaskMeta meta; MPPTaskId id; MPPTunnelSetPtr tunnel_set; - /// key: executor_id of ExchangeReceiver nodes in dag. - ExchangeReceiverMapPtr mpp_exchange_receiver_map; + + MPPReceiverSetPtr receiver_set; int new_thread_count_of_exchange_receiver = 0; @@ -137,8 +149,6 @@ class MPPTask : public std::enable_shared_from_this MPPTaskStatistics mpp_task_statistics; - Exception err; - friend class MPPTaskManager; int needed_threads; diff --git a/dbms/src/Flash/Mpp/MPPTaskManager.cpp b/dbms/src/Flash/Mpp/MPPTaskManager.cpp index 531f8f7a10d..c5499eda89d 100644 --- a/dbms/src/Flash/Mpp/MPPTaskManager.cpp +++ b/dbms/src/Flash/Mpp/MPPTaskManager.cpp @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include #include #include #include @@ -22,6 +23,11 @@ namespace DB { +namespace FailPoints +{ +extern const char random_task_manager_find_task_failure_failpoint[]; +} // namespace FailPoints + MPPTaskManager::MPPTaskManager(MPPTaskSchedulerPtr scheduler_) : scheduler(std::move(scheduler_)) , log(&Poco::Logger::get("TaskManager")) @@ -50,6 +56,7 @@ MPPTaskPtr MPPTaskManager::findTaskWithTimeout(const mpp::TaskMeta & meta, std:: it = query_it->second->task_map.find(id); return it != query_it->second->task_map.end(); }); + fiu_do_on(FailPoints::random_task_manager_find_task_failure_failpoint, ret = false;); if (cancelled) { errMsg = fmt::format("Task [{},{}] has been cancelled.", meta.start_ts(), meta.task_id()); @@ -140,6 +147,17 @@ bool MPPTaskManager::registerTask(MPPTaskPtr task) return true; } +bool MPPTaskManager::isTaskToBeCancelled(const MPPTaskId & task_id) +{ + std::unique_lock lock(mu); + auto it = mpp_query_map.find(task_id.start_ts); + if (it != mpp_query_map.end() && it->second->to_be_cancelled) + { + return it->second->task_map.find(task_id) != it->second->task_map.end(); + } + return false; +} + void MPPTaskManager::unregisterTask(MPPTask * task) { std::unique_lock lock(mu); diff --git a/dbms/src/Flash/Mpp/MPPTaskManager.h b/dbms/src/Flash/Mpp/MPPTaskManager.h index d7047804aca..770acea3853 100644 --- a/dbms/src/Flash/Mpp/MPPTaskManager.h +++ b/dbms/src/Flash/Mpp/MPPTaskManager.h @@ -73,6 +73,8 @@ class MPPTaskManager : private boost::noncopyable void unregisterTask(MPPTask * task); + bool isTaskToBeCancelled(const MPPTaskId & task_id); + bool tryToScheduleTask(const MPPTaskPtr & task); void releaseThreadsFromScheduler(const int needed_threads); diff --git a/dbms/src/Flash/Mpp/MPPTunnel.cpp b/dbms/src/Flash/Mpp/MPPTunnel.cpp index 826e7fea88a..16fe4ae42cc 100644 --- a/dbms/src/Flash/Mpp/MPPTunnel.cpp +++ b/dbms/src/Flash/Mpp/MPPTunnel.cpp @@ -25,6 +25,7 @@ namespace DB namespace FailPoints { extern const char exception_during_mpp_close_tunnel[]; +extern const char random_tunnel_wait_timeout_failpoint[]; } // namespace FailPoints template @@ -219,7 +220,11 @@ void MPPTunnelBase::sendJob(bool need_lock) err_msg = "fatal error in sendJob()"; } if (!err_msg.empty()) + { + /// append tunnel id to error message + err_msg = fmt::format("{} meet error: {}", tunnel_id, err_msg); LOG_ERROR(log, err_msg); + } consumerFinish(err_msg, need_lock); if (is_async) writer->writeDone(grpc::Status::OK); @@ -322,6 +327,7 @@ void MPPTunnelBase::waitUntilConnectedOrFinished(std::unique_lock #include #include #include namespace DB { +namespace FailPoints +{ +extern const char random_min_tso_scheduler_failpoint[]; +} // namespace FailPoints + constexpr UInt64 MAX_UINT64 = std::numeric_limits::max(); constexpr UInt64 OS_THREAD_SOFT_LIMIT = 100000; @@ -193,7 +199,9 @@ bool MinTSOScheduler::scheduleImp(const UInt64 tso, const MPPQueryTaskSetPtr & q } else { - if (tso <= min_tso) /// the min_tso query should fully run, otherwise throw errors here. + bool is_tso_min = tso <= min_tso; + fiu_do_on(FailPoints::random_min_tso_scheduler_failpoint, is_tso_min = true;); + if (is_tso_min) /// the min_tso query should fully run, otherwise throw errors here. { has_error = true; auto msg = fmt::format("threads are unavailable for the query {} ({} min_tso {}) {}, need {}, but used {} of the thread hard limit {}, {} active and {} waiting queries.", tso, tso == min_tso ? "is" : "is newer than", min_tso, isWaiting ? "from the waiting set" : "when directly schedule it", needed_threads, estimated_thread_usage, thread_hard_limit, active_set.size(), waiting_set.size()); diff --git a/dbms/src/Flash/Mpp/TaskStatus.cpp b/dbms/src/Flash/Mpp/TaskStatus.cpp index 423b768faea..c87ae2b8eb4 100644 --- a/dbms/src/Flash/Mpp/TaskStatus.cpp +++ b/dbms/src/Flash/Mpp/TaskStatus.cpp @@ -29,6 +29,8 @@ StringRef taskStatusToString(const TaskStatus & status) return "FINISHED"; case CANCELLED: return "CANCELLED"; + case FAILED: + return "FAILED"; default: throw Exception("Unknown TaskStatus"); } diff --git a/dbms/src/Flash/Mpp/TaskStatus.h b/dbms/src/Flash/Mpp/TaskStatus.h index 999e30790bf..0997c8adc52 100644 --- a/dbms/src/Flash/Mpp/TaskStatus.h +++ b/dbms/src/Flash/Mpp/TaskStatus.h @@ -24,6 +24,7 @@ enum TaskStatus RUNNING, FINISHED, CANCELLED, + FAILED, }; StringRef taskStatusToString(const TaskStatus & status); diff --git a/dbms/src/Flash/Mpp/Utils.cpp b/dbms/src/Flash/Mpp/Utils.cpp index 477c478eef7..21d89b3cd52 100644 --- a/dbms/src/Flash/Mpp/Utils.cpp +++ b/dbms/src/Flash/Mpp/Utils.cpp @@ -13,6 +13,7 @@ // limitations under the License. #include +#include #include @@ -27,4 +28,14 @@ mpp::MPPDataPacket getPacketWithError(String reason) return data; } +void trimStackTrace(String & message) +{ + auto stack_trace_pos = message.find("Stack trace"); + if (stack_trace_pos != String::npos) + { + message.resize(stack_trace_pos); + Poco::trimRightInPlace(message); + } +} + } // namespace DB diff --git a/dbms/src/Flash/Mpp/Utils.h b/dbms/src/Flash/Mpp/Utils.h index 67e2dc3f641..021dc4407d5 100644 --- a/dbms/src/Flash/Mpp/Utils.h +++ b/dbms/src/Flash/Mpp/Utils.h @@ -23,5 +23,6 @@ namespace DB { mpp::MPPDataPacket getPacketWithError(String reason); +void trimStackTrace(String & message); } // namespace DB diff --git a/dbms/src/Flash/Mpp/tests/gtest_mpptunnel.cpp b/dbms/src/Flash/Mpp/tests/gtest_mpptunnel.cpp index 47ce2ee6ee6..706c17ed036 100644 --- a/dbms/src/Flash/Mpp/tests/gtest_mpptunnel.cpp +++ b/dbms/src/Flash/Mpp/tests/gtest_mpptunnel.cpp @@ -382,7 +382,7 @@ TEST_F(TestMPPTunnelBase, WriteError) } catch (Exception & e) { - GTEST_ASSERT_EQ(e.message(), "Consumer exits unexpected, grpc writes failed."); + GTEST_ASSERT_EQ(e.message(), "Consumer exits unexpected, 0000_0001 meet error: grpc writes failed."); } } @@ -631,7 +631,7 @@ TEST_F(TestMPPTunnelBase, AsyncWriteError) } catch (Exception & e) { - GTEST_ASSERT_EQ(e.message(), "Consumer exits unexpected, grpc writes failed."); + GTEST_ASSERT_EQ(e.message(), "Consumer exits unexpected, 0000_0001 meet error: grpc writes failed."); } } diff --git a/dbms/src/Flash/tests/WindowTestUtil.h b/dbms/src/Flash/tests/WindowTestUtil.h index 3f4cb7d595f..b7385380419 100644 --- a/dbms/src/Flash/tests/WindowTestUtil.h +++ b/dbms/src/Flash/tests/WindowTestUtil.h @@ -39,9 +39,9 @@ inline void mockExecuteProject(std::shared_ptr & mock_ mock_interpreter->executeProject(pipeline, final_project); } -inline void mockExecuteWindowOrder(std::shared_ptr & mock_interpreter, DAGPipeline & pipeline, const tipb::Sort & sort) +inline void mockExecuteWindowOrder(std::shared_ptr & mock_interpreter, DAGPipeline & pipeline, const tipb::Sort & sort, uint64_t fine_grained_shuffle_stream_count) { - mock_interpreter->handleWindowOrder(pipeline, sort); + mock_interpreter->handleWindowOrder(pipeline, sort, ::DB::enableFineGrainedShuffle(fine_grained_shuffle_stream_count)); mock_interpreter->input_streams_vec[0] = pipeline.streams; NamesWithAliases final_project; for (const auto & column : (*mock_interpreter->analyzer).source_columns) @@ -51,16 +51,9 @@ inline void mockExecuteWindowOrder(std::shared_ptr & m mockExecuteProject(mock_interpreter, pipeline, final_project); } -inline void mockExecuteWindowOrder(std::shared_ptr & mock_interpreter, DAGPipeline & pipeline, const String & sort_json) +inline void mockExecuteWindow(std::shared_ptr & mock_interpreter, DAGPipeline & pipeline, const tipb::Window & window, uint64_t fine_grained_shuffle_stream_count) { - tipb::Sort sort; - ::google::protobuf::util::JsonStringToMessage(sort_json, &sort); - mockExecuteWindowOrder(mock_interpreter, pipeline, sort); -} - -inline void mockExecuteWindow(std::shared_ptr & mock_interpreter, DAGPipeline & pipeline, const tipb::Window & window) -{ - mock_interpreter->handleWindow(pipeline, window); + mock_interpreter->handleWindow(pipeline, window, ::DB::enableFineGrainedShuffle(fine_grained_shuffle_stream_count)); mock_interpreter->input_streams_vec[0] = pipeline.streams; NamesWithAliases final_project; for (const auto & column : (*mock_interpreter->analyzer).source_columns) @@ -70,12 +63,5 @@ inline void mockExecuteWindow(std::shared_ptr & mock_i mockExecuteProject(mock_interpreter, pipeline, final_project); } -inline void mockExecuteWindow(std::shared_ptr & mock_interpreter, DAGPipeline & pipeline, std::string window_json_str) -{ - tipb::Window window; - google::protobuf::util::JsonStringToMessage(window_json_str, &window); - mockExecuteWindow(mock_interpreter, pipeline, window); -} - } // namespace tests } // namespace DB diff --git a/dbms/src/Flash/tests/bench_exchange.cpp b/dbms/src/Flash/tests/bench_exchange.cpp index fbb53bfd4a4..d6e3f3e825e 100644 --- a/dbms/src/Flash/tests/bench_exchange.cpp +++ b/dbms/src/Flash/tests/bench_exchange.cpp @@ -47,29 +47,46 @@ MockFixedRowsBlockInputStream::MockFixedRowsBlockInputStream(size_t total_rows_, , blocks(blocks_) {} -Block makeBlock(int row_num) +Block makeBlock(int row_num, bool skew) { - std::mt19937 mt(rd()); - std::uniform_int_distribution int64_dist; - std::uniform_int_distribution len_dist(10, 20); - std::uniform_int_distribution char_dist; - InferredDataVector> int64_vec; InferredDataVector> int64_vec2; - for (int i = 0; i < row_num; ++i) + InferredDataVector> string_vec; + + if (skew) { - int64_vec.emplace_back(int64_dist(mt)); - int64_vec2.emplace_back(int64_dist(mt)); + for (int i = 0; i < row_num; ++i) + { + int64_vec.emplace_back(100); + int64_vec2.emplace_back(100); + } + + for (int i = 0; i < row_num; ++i) + { + string_vec.push_back("abcdefg"); + } } - - InferredDataVector> string_vec; - for (int i = 0; i < row_num; ++i) + else { - int len = len_dist(mt); - String s; - for (int j = 0; j < len; ++j) - s.push_back(char_dist(mt)); - string_vec.push_back(std::move(s)); + std::mt19937 mt(rd()); + std::uniform_int_distribution int64_dist; + std::uniform_int_distribution len_dist(10, 20); + std::uniform_int_distribution char_dist; + + for (int i = 0; i < row_num; ++i) + { + int64_vec.emplace_back(int64_dist(mt)); + int64_vec2.emplace_back(int64_dist(mt)); + } + + for (int i = 0; i < row_num; ++i) + { + int len = len_dist(mt); + String s; + for (int j = 0; j < len; ++j) + s.push_back(char_dist(mt)); + string_vec.push_back(std::move(s)); + } } auto int64_data_type = makeDataType>(); @@ -82,11 +99,11 @@ Block makeBlock(int row_num) return Block({int64_column, string_column, int64_column2}); } -std::vector makeBlocks(int block_num, int row_num) +std::vector makeBlocks(int block_num, int row_num, bool skew) { std::vector blocks; for (int i = 0; i < block_num; ++i) - blocks.push_back(makeBlock(row_num)); + blocks.push_back(makeBlock(row_num, skew)); return blocks; } @@ -139,32 +156,10 @@ void printException(const Exception & e) << e.getStackTrace().toString() << std::endl; } -void sendPacket(const std::vector & packets, const PacketQueuePtr & queue, StopFlag & stop_flag) -{ - std::mt19937 mt(rd()); - std::uniform_int_distribution dist(0, packets.size() - 1); - - while (!stop_flag.load()) - { - int i = dist(mt); - queue->tryPush(packets[i], std::chrono::milliseconds(10)); - } - queue->finish(); -} - -void receivePacket(const PacketQueuePtr & queue) -{ - while (true) - { - PacketPtr packet; - if (!queue->pop(packet)) - break; - } -} - -ReceiverHelper::ReceiverHelper(int concurrency_, int source_num_) +ReceiverHelper::ReceiverHelper(int concurrency_, int source_num_, uint32_t fine_grained_shuffle_stream_count_) : concurrency(concurrency_) , source_num(source_num_) + , fine_grained_shuffle_stream_count(fine_grained_shuffle_stream_count_) { pb_exchange_receiver.set_tp(tipb::Hash); for (int i = 0; i < source_num; ++i) @@ -198,16 +193,21 @@ MockExchangeReceiverPtr ReceiverHelper::buildReceiver() source_num, concurrency, "mock_req_id", - "mock_exchange_receiver_id"); + "mock_exchange_receiver_id", + fine_grained_shuffle_stream_count); } std::vector ReceiverHelper::buildExchangeReceiverStream() { auto receiver = buildReceiver(); std::vector streams(concurrency); + // NOTE: check if need fine_grained_shuffle_stream_count for (int i = 0; i < concurrency; ++i) { - streams[i] = std::make_shared(receiver, "mock_req_id", "mock_executor_id" + std::to_string(i)); + streams[i] = std::make_shared(receiver, + "mock_req_id", + "mock_executor_id" + std::to_string(i), + /*stream_id=*/enableFineGrainedShuffle(fine_grained_shuffle_stream_count) ? i : 0); } return streams; } @@ -215,7 +215,7 @@ std::vector ReceiverHelper::buildExchangeReceiverStream() BlockInputStreamPtr ReceiverHelper::buildUnionStream() { auto streams = buildExchangeReceiverStream(); - return std::make_shared>(streams, nullptr, concurrency, /*req_id=*/""); + return std::make_shared>(streams, BlockInputStreams{}, concurrency, /*req_id=*/""); } void ReceiverHelper::finish() @@ -230,10 +230,14 @@ void ReceiverHelper::finish() SenderHelper::SenderHelper( int source_num_, int concurrency_, + uint32_t fine_grained_shuffle_stream_count_, + int64_t fine_grained_shuffle_batch_size_, const std::vector & queues_, const std::vector & fields) : source_num(source_num_) , concurrency(concurrency_) + , fine_grained_shuffle_stream_count(fine_grained_shuffle_stream_count_) + , fine_grained_shuffle_batch_size(fine_grained_shuffle_batch_size_) , queues(queues_) { mpp::TaskMeta task_meta; @@ -277,20 +281,41 @@ BlockInputStreamPtr SenderHelper::buildUnionStream( for (int i = 0; i < concurrency; ++i) { BlockInputStreamPtr stream = std::make_shared(blocks, stop_flag); - std::unique_ptr response_writer( - new StreamingDAGResponseWriter( - tunnel_set, - {0, 1, 2}, - TiDB::TiDBCollators(3), - tipb::Hash, - -1, - -1, - true, - *dag_context)); - send_streams.push_back(std::make_shared(stream, std::move(response_writer), /*req_id=*/"")); + if (enableFineGrainedShuffle(fine_grained_shuffle_stream_count)) + { + std::unique_ptr response_writer( + new StreamingDAGResponseWriter( + tunnel_set, + {0, 1, 2}, + TiDB::TiDBCollators(3), + tipb::Hash, + -1, + -1, + true, + *dag_context, + fine_grained_shuffle_stream_count, + fine_grained_shuffle_batch_size)); + send_streams.push_back(std::make_shared(stream, std::move(response_writer), /*req_id=*/"")); + } + else + { + std::unique_ptr response_writer( + new StreamingDAGResponseWriter( + tunnel_set, + {0, 1, 2}, + TiDB::TiDBCollators(3), + tipb::Hash, + -1, + -1, + true, + *dag_context, + fine_grained_shuffle_stream_count, + fine_grained_shuffle_batch_size)); + send_streams.push_back(std::make_shared(stream, std::move(response_writer), /*req_id=*/"")); + } } - return std::make_shared>(send_streams, nullptr, concurrency, /*req_id=*/""); + return std::make_shared>(send_streams, BlockInputStreams{}, concurrency, /*req_id=*/""); } BlockInputStreamPtr SenderHelper::buildUnionStream(size_t total_rows, const std::vector & blocks) @@ -299,20 +324,41 @@ BlockInputStreamPtr SenderHelper::buildUnionStream(size_t total_rows, const std: for (int i = 0; i < concurrency; ++i) { BlockInputStreamPtr stream = std::make_shared(total_rows / concurrency, blocks); - std::unique_ptr response_writer( - new StreamingDAGResponseWriter( - tunnel_set, - {0, 1, 2}, - TiDB::TiDBCollators(3), - tipb::Hash, - -1, - -1, - true, - *dag_context)); - send_streams.push_back(std::make_shared(stream, std::move(response_writer), /*req_id=*/"")); + if (enableFineGrainedShuffle(fine_grained_shuffle_stream_count)) + { + std::unique_ptr response_writer( + new StreamingDAGResponseWriter( + tunnel_set, + {0, 1, 2}, + TiDB::TiDBCollators(3), + tipb::Hash, + -1, + -1, + true, + *dag_context, + fine_grained_shuffle_stream_count, + fine_grained_shuffle_batch_size)); + send_streams.push_back(std::make_shared(stream, std::move(response_writer), /*req_id=*/"")); + } + else + { + std::unique_ptr response_writer( + new StreamingDAGResponseWriter( + tunnel_set, + {0, 1, 2}, + TiDB::TiDBCollators(3), + tipb::Hash, + -1, + -1, + true, + *dag_context, + fine_grained_shuffle_stream_count, + fine_grained_shuffle_batch_size)); + send_streams.push_back(std::make_shared(stream, std::move(response_writer), /*req_id=*/"")); + } } - return std::make_shared>(send_streams, nullptr, concurrency, /*req_id=*/""); + return std::make_shared>(send_streams, BlockInputStreams{}, concurrency, /*req_id=*/""); } void SenderHelper::finish() @@ -327,13 +373,12 @@ void SenderHelper::finish() void ExchangeBench::SetUp(const benchmark::State &) { - Poco::Logger::root().setLevel("error"); - DynamicThreadPool::global_instance = std::make_unique( /*fixed_thread_num=*/300, std::chrono::milliseconds(100000)); - input_blocks = makeBlocks(/*block_num=*/100, /*row_num=*/1024); + uniform_blocks = makeBlocks(/*block_num=*/100, /*row_num=*/1024); + skew_blocks = makeBlocks(/*block_num=*/100, /*row_num=*/1024, /*skew=*/true); try { @@ -348,7 +393,8 @@ void ExchangeBench::SetUp(const benchmark::State &) void ExchangeBench::TearDown(const benchmark::State &) { - input_blocks.clear(); + uniform_blocks.clear(); + skew_blocks.clear(); // NOTE: Must reset here, otherwise DynamicThreadPool::fixedWork() may core because metrics already destroyed. DynamicThreadPool::global_instance.reset(); } @@ -383,25 +429,38 @@ try const int concurrency = state.range(0); const int source_num = state.range(1); const int total_rows = state.range(2); + const int fine_grained_shuffle_stream_count = state.range(3); + const int fine_grained_shuffle_batch_size = state.range(4); Context context = TiFlashTestEnv::getContext(); for (auto _ : state) { - std::shared_ptr receiver_helper = std::make_shared(concurrency, source_num); + std::shared_ptr receiver_helper = std::make_shared(concurrency, source_num, fine_grained_shuffle_stream_count); BlockInputStreamPtr receiver_stream = receiver_helper->buildUnionStream(); std::shared_ptr sender_helper = std::make_shared(source_num, concurrency, + fine_grained_shuffle_stream_count, + fine_grained_shuffle_batch_size, receiver_helper->queues, receiver_helper->fields); - BlockInputStreamPtr sender_stream = sender_helper->buildUnionStream(total_rows, input_blocks); + BlockInputStreamPtr sender_stream = sender_helper->buildUnionStream(total_rows, uniform_blocks); runAndWait(receiver_helper, receiver_stream, sender_helper, sender_stream); } } CATCH BENCHMARK_REGISTER_F(ExchangeBench, basic_send_receive) - ->Args({8, 1, 1024 * 1000}); + ->Args({8, 1, 1024 * 1000, 0, 4096}) + ->Args({8, 1, 1024 * 1000, 4, 4096}) + ->Args({8, 1, 1024 * 1000, 8, 4096}) + ->Args({8, 1, 1024 * 1000, 16, 4096}) + ->Args({8, 1, 1024 * 1000, 32, 4096}) + ->Args({8, 1, 1024 * 1000, 8, 1}) + ->Args({8, 1, 1024 * 1000, 8, 1000}) + ->Args({8, 1, 1024 * 1000, 8, 10000}) + ->Args({8, 1, 1024 * 1000, 8, 100000}); + } // namespace tests } // namespace DB diff --git a/dbms/src/Flash/tests/bench_exchange.h b/dbms/src/Flash/tests/bench_exchange.h index 6b09e319613..d8300d45740 100644 --- a/dbms/src/Flash/tests/bench_exchange.h +++ b/dbms/src/Flash/tests/bench_exchange.h @@ -69,7 +69,9 @@ struct MockReceiverContext : queue(queue_) {} - void initialize() const {} + void initialize() const + { + } bool read(PacketPtr & packet [[maybe_unused]]) const { @@ -105,7 +107,8 @@ struct MockReceiverContext const std::vector & field_types_) : queues(queues_) , field_types(field_types_) - {} + { + } void fillSchema(DAGSchema & schema) const { @@ -220,8 +223,8 @@ struct MockFixedRowsBlockInputStream : public IProfilingBlockInputStream } }; -Block makeBlock(int row_num); -std::vector makeBlocks(int block_num, int row_num); +Block makeBlock(int row_num, bool skew = false); +std::vector makeBlocks(int block_num, int row_num, bool skew = false); mpp::MPPDataPacket makePacket(ChunkCodecStream & codec, int row_num); std::vector makePackets(ChunkCodecStream & codec, int packet_num, int row_num); std::vector makePacketQueues(int source_num, int queue_size); @@ -234,17 +237,17 @@ struct ReceiverHelper { const int concurrency; const int source_num; + const uint32_t fine_grained_shuffle_stream_count; tipb::ExchangeReceiver pb_exchange_receiver; std::vector fields; mpp::TaskMeta task_meta; std::vector queues; std::shared_ptr join_ptr; - explicit ReceiverHelper(int concurrency_, int source_num_); + explicit ReceiverHelper(int concurrency_, int source_num_, uint32_t fine_grained_shuffle_stream_count_); MockExchangeReceiverPtr buildReceiver(); std::vector buildExchangeReceiverStream(); BlockInputStreamPtr buildUnionStream(); - BlockInputStreamPtr buildUnionStreamWithHashJoinBuildStream(); void finish(); }; @@ -252,6 +255,8 @@ struct SenderHelper { const int source_num; const int concurrency; + const uint32_t fine_grained_shuffle_stream_count; + const int64_t fine_grained_shuffle_batch_size; std::vector queues; std::vector mock_writers; @@ -262,6 +267,8 @@ struct SenderHelper SenderHelper( int source_num_, int concurrency_, + uint32_t fine_grained_shuffle_stream_count_, + int64_t fine_grained_shuffle_batch_size_, const std::vector & queues_, const std::vector & fields); @@ -283,7 +290,8 @@ class ExchangeBench : public benchmark::Fixture std::shared_ptr & sender_helper, BlockInputStreamPtr sender_stream); - std::vector input_blocks; + std::vector uniform_blocks; + std::vector skew_blocks; }; diff --git a/dbms/src/Flash/tests/bench_window.cpp b/dbms/src/Flash/tests/bench_window.cpp index da9df20fdf3..75dc53b065b 100644 --- a/dbms/src/Flash/tests/bench_window.cpp +++ b/dbms/src/Flash/tests/bench_window.cpp @@ -24,10 +24,14 @@ class WindowFunctionBench : public ExchangeBench public: void SetUp(const benchmark::State & state) override { - // build tipb::Window and tipb::Sort. + // Using DAGRequestBuilder to build tipb::Window and tipb::Sort. // select row_number() over w1 from t1 window w1 as (partition by c1, c2, c3 order by c1, c2, c3); ExchangeBench::SetUp(state); - MockColumnInfos columns{ + } + + static void setupPB(uint64_t fine_grained_shuffle_stream_count, tipb::Window & window, tipb::Sort & sort) + { + MockColumnInfoVec columns{ {"c1", TiDB::TP::TypeLongLong}, {"c2", TiDB::TP::TypeString}, {"c3", TiDB::TP::TypeLongLong}, @@ -36,11 +40,12 @@ class WindowFunctionBench : public ExchangeBench DAGRequestBuilder builder(executor_index); builder .mockTable("test", "t1", columns) - .sort({{"c1", false}, {"c2", false}, {"c3", false}}, true) + .sort({{"c1", false}, {"c2", false}, {"c3", false}}, true, fine_grained_shuffle_stream_count) .window(RowNumber(), {{"c1", false}, {"c2", false}, {"c3", false}}, {{"c1", false}, {"c2", false}, {"c3", false}}, - buildDefaultRowsFrame()); + buildDefaultRowsFrame(), + fine_grained_shuffle_stream_count); tipb::DAGRequest req; MPPInfo mpp_info(0, -1, -1, {}, std::unordered_map>{}); builder.getRoot()->toTiPBExecutor(req.mutable_root_executor(), /*collator_id=*/0, mpp_info, TiFlashTestEnv::getContext()); @@ -50,13 +55,17 @@ class WindowFunctionBench : public ExchangeBench sort = window.child().sort(); } - void prepareWindowStream(Context & context, int concurrency, int source_num, int total_rows, const std::vector & blocks, BlockInputStreamPtr & sender_stream, BlockInputStreamPtr & receiver_stream, std::shared_ptr & sender_helper, std::shared_ptr & receiver_helper) const + static void prepareWindowStream(Context & context, int concurrency, int source_num, int total_rows, uint32_t fine_grained_shuffle_stream_count, uint64_t fine_grained_shuffle_batch_size, const std::vector & blocks, BlockInputStreamPtr & sender_stream, BlockInputStreamPtr & receiver_stream, std::shared_ptr & sender_helper, std::shared_ptr & receiver_helper, bool build_window = true) { + tipb::Window window; + tipb::Sort sort; + setupPB(fine_grained_shuffle_stream_count, window, sort); + DAGPipeline pipeline; - receiver_helper = std::make_shared(concurrency, source_num); + receiver_helper = std::make_shared(concurrency, source_num, fine_grained_shuffle_stream_count); pipeline.streams = receiver_helper->buildExchangeReceiverStream(); - sender_helper = std::make_shared(source_num, concurrency, receiver_helper->queues, receiver_helper->fields); + sender_helper = std::make_shared(source_num, concurrency, fine_grained_shuffle_stream_count, fine_grained_shuffle_batch_size, receiver_helper->queues, receiver_helper->fields); sender_stream = sender_helper->buildUnionStream(total_rows, blocks); context.setDAGContext(sender_helper->dag_context.get()); @@ -66,16 +75,16 @@ class WindowFunctionBench : public ExchangeBench NameAndTypePair("c3", makeNullable(std::make_shared()))}; auto mock_interpreter = mockInterpreter(context, source_columns, concurrency); mock_interpreter->input_streams_vec.push_back(pipeline.streams); - mockExecuteWindowOrder(mock_interpreter, pipeline, sort); - mockExecuteWindow(mock_interpreter, pipeline, window); + mockExecuteWindowOrder(mock_interpreter, pipeline, sort, fine_grained_shuffle_stream_count); + if (build_window) + { + mockExecuteWindow(mock_interpreter, pipeline, window, fine_grained_shuffle_stream_count); + } pipeline.transform([&](auto & stream) { stream = std::make_shared(stream, 8192, 0, "mock_executor_id_squashing"); }); - receiver_stream = std::make_shared>(pipeline.streams, nullptr, concurrency, /*req_id=*/""); + receiver_stream = std::make_shared>(pipeline.streams, BlockInputStreams{}, concurrency, /*req_id=*/""); } - - tipb::Window window; - tipb::Sort sort; }; BENCHMARK_DEFINE_F(WindowFunctionBench, basic_row_number) @@ -85,8 +94,15 @@ try const int concurrency = state.range(0); const int source_num = state.range(1); const int total_rows = state.range(2); + const int fine_grained_shuffle_stream_count = state.range(3); + const int fine_grained_shuffle_batch_size = state.range(4); + const bool skew = state.range(5); Context context = TiFlashTestEnv::getContext(); + std::vector * blocks = &uniform_blocks; + if (skew) + blocks = &skew_blocks; + for (auto _ : state) { std::shared_ptr sender_helper; @@ -94,14 +110,58 @@ try BlockInputStreamPtr sender_stream; BlockInputStreamPtr receiver_stream; - prepareWindowStream(context, concurrency, source_num, total_rows, input_blocks, sender_stream, receiver_stream, sender_helper, receiver_helper); + prepareWindowStream(context, concurrency, source_num, total_rows, fine_grained_shuffle_stream_count, fine_grained_shuffle_batch_size, *blocks, sender_stream, receiver_stream, sender_helper, receiver_helper); runAndWait(receiver_helper, receiver_stream, sender_helper, sender_stream); } } CATCH BENCHMARK_REGISTER_F(WindowFunctionBench, basic_row_number) - ->Args({8, 1, 1024 * 1000}); + ->Args({8, 1, 1024 * 1000, 0, 4096, false}) // Test fine_grained_shuffle_stream_count. + ->Args({8, 1, 1024 * 1000, 4, 4096, false}) + ->Args({8, 1, 1024 * 1000, 8, 4096, false}) + ->Args({8, 1, 1024 * 1000, 16, 4096, false}) + ->Args({8, 1, 1024 * 1000, 32, 4096, false}) + ->Args({8, 1, 1024 * 1000, 8, 1, false}) // Test fine_grained_shuffle_batch_size. + ->Args({8, 1, 1024 * 1000, 8, 1000, false}) + ->Args({8, 1, 1024 * 1000, 8, 10000, false}) + ->Args({8, 1, 1024 * 1000, 8, 100000, false}) + ->Args({8, 1, 1024 * 1000, 0, 4096, true}) // Test skew dataset. + ->Args({8, 1, 1024 * 1000, 4, 4096, true}) + ->Args({8, 1, 1024 * 1000, 8, 4096, true}) + ->Args({8, 1, 1024 * 1000, 16, 4096, true}); + +BENCHMARK_DEFINE_F(WindowFunctionBench, partial_sort_skew_dataset) +(benchmark::State & state) +try +{ + const int concurrency = state.range(0); + const int source_num = state.range(1); + const int total_rows = state.range(2); + const int fine_grained_shuffle_stream_count = state.range(3); + const int fine_grained_shuffle_batch_size = state.range(4); + Context context = TiFlashTestEnv::getContext(); + std::vector * blocks = &skew_blocks; + + for (auto _ : state) + { + std::shared_ptr sender_helper; + std::shared_ptr receiver_helper; + BlockInputStreamPtr sender_stream; + BlockInputStreamPtr receiver_stream; + + // Only build partial sort. + prepareWindowStream(context, concurrency, source_num, total_rows, fine_grained_shuffle_stream_count, fine_grained_shuffle_batch_size, *blocks, sender_stream, receiver_stream, sender_helper, receiver_helper, /*build_window=*/false); + + runAndWait(receiver_helper, receiver_stream, sender_helper, sender_stream); + } +} +CATCH +BENCHMARK_REGISTER_F(WindowFunctionBench, partial_sort_skew_dataset) + ->Args({1, 1, 1024 * 10000, 0, 4096}) // Test how much multiple-thread improves performance for partial sort. + ->Args({2, 1, 1024 * 10000, 0, 4096}) + ->Args({4, 1, 1024 * 10000, 0, 4096}) + ->Args({8, 1, 1024 * 10000, 0, 4096}); } // namespace tests } // namespace DB diff --git a/dbms/src/Flash/tests/gtest_executor.cpp b/dbms/src/Flash/tests/gtest_executor.cpp index 64c60f14bb6..d0e7b7e6c67 100644 --- a/dbms/src/Flash/tests/gtest_executor.cpp +++ b/dbms/src/Flash/tests/gtest_executor.cpp @@ -69,18 +69,18 @@ try .filter(eq(col("s1"), col("s2"))) .build(context); { - executeStreams(request, - {toNullableVec({"banana"}), - toNullableVec({"banana"})}); + ASSERT_COLUMNS_EQ_R(executeStreams(request), + createColumns({toNullableVec({"banana"}), + toNullableVec({"banana"})})); } request = context.receive("exchange1") .filter(eq(col("s1"), col("s2"))) .build(context); { - executeStreams(request, - {toNullableVec({"banana"}), - toNullableVec({"banana"})}); + ASSERT_COLUMNS_EQ_R(executeStreams(request), + createColumns({toNullableVec({"banana"}), + toNullableVec({"banana"})})); } } CATCH @@ -99,25 +99,23 @@ try " table_scan_0 | {<0, String>, <1, String>}\n" " table_scan_1 | {<0, String>, <1, String>}\n"; ASSERT_DAGREQUEST_EQAUL(expected, request); - executeStreams(request, - {toNullableVec({"banana", "banana"}), - toNullableVec({"apple", "banana"}), - toNullableVec({"banana", "banana"}), - toNullableVec({"apple", "banana"})}, - 2); - - executeStreams(request, - {toNullableVec({"banana", "banana"}), - toNullableVec({"apple", "banana"}), - toNullableVec({"banana", "banana"}), - toNullableVec({"apple", "banana"})}, - 5); - - executeStreams(request, - {toNullableVec({"banana", "banana"}), - toNullableVec({"apple", "banana"}), - toNullableVec({"banana", "banana"}), - toNullableVec({"apple", "banana"})}); + ASSERT_COLUMNS_EQ_R(executeStreams(request, 2), + createColumns({toNullableVec({"banana", "banana"}), + toNullableVec({"apple", "banana"}), + toNullableVec({"banana", "banana"}), + toNullableVec({"apple", "banana"})})); + + ASSERT_COLUMNS_EQ_R(executeStreams(request, 5), + createColumns({toNullableVec({"banana", "banana"}), + toNullableVec({"apple", "banana"}), + toNullableVec({"banana", "banana"}), + toNullableVec({"apple", "banana"})})); + + ASSERT_COLUMNS_EQ_R(executeStreams(request), + createColumns({toNullableVec({"banana", "banana"}), + toNullableVec({"apple", "banana"}), + toNullableVec({"banana", "banana"}), + toNullableVec({"apple", "banana"})})); } request = context .scan("test_db", "l_table") @@ -132,10 +130,9 @@ try " table_scan_0 | {<0, String>, <1, String>}\n" " table_scan_1 | {<0, String>, <1, String>}\n"; ASSERT_DAGREQUEST_EQAUL(expected, request); - executeStreams(request, - {toNullableVec({"banana", "banana"}), - toNullableVec({"apple", "banana"})}, - 2); + ASSERT_COLUMNS_EQ_R(executeStreams(request, 2), + createColumns({toNullableVec({"banana", "banana"}), + toNullableVec({"apple", "banana"})})); } request = context @@ -149,18 +146,16 @@ try " table_scan_0 | {<0, String>, <1, String>}\n" " table_scan_1 | {<0, String>, <1, String>}\n"; ASSERT_DAGREQUEST_EQAUL(expected, request); - executeStreams(request, - {toNullableVec({"banana", "banana", "banana", "banana"}), - toNullableVec({"apple", "apple", "apple", "banana"}), - toNullableVec({"banana", "banana", "banana", {}}), - toNullableVec({"apple", "apple", "apple", {}})}, - 2); - executeStreams(request, - {toNullableVec({"banana", "banana", "banana", "banana"}), - toNullableVec({"apple", "apple", "apple", "banana"}), - toNullableVec({"banana", "banana", "banana", {}}), - toNullableVec({"apple", "apple", "apple", {}})}, - 3); + ASSERT_COLUMNS_EQ_R(executeStreams(request, 2), + createColumns({toNullableVec({"banana", "banana", "banana", "banana"}), + toNullableVec({"apple", "apple", "apple", "banana"}), + toNullableVec({"banana", "banana", "banana", {}}), + toNullableVec({"apple", "apple", "apple", {}})})); + ASSERT_COLUMNS_EQ_R(executeStreams(request, 3), + createColumns({toNullableVec({"banana", "banana", "banana", "banana"}), + toNullableVec({"apple", "apple", "apple", "banana"}), + toNullableVec({"banana", "banana", "banana", {}}), + toNullableVec({"apple", "apple", "apple", {}})})); } } CATCH @@ -179,25 +174,23 @@ try " exchange_receiver_0 | type:PassThrough, {<0, String>, <1, String>}\n" " exchange_receiver_1 | type:PassThrough, {<0, String>, <1, String>}\n"; ASSERT_DAGREQUEST_EQAUL(expected, request); - executeStreams(request, - {toNullableVec({"banana", "banana"}), - toNullableVec({"apple", "banana"}), - toNullableVec({"banana", "banana"}), - toNullableVec({"apple", "banana"})}, - 2); - - executeStreams(request, - {toNullableVec({"banana", "banana"}), - toNullableVec({"apple", "banana"}), - toNullableVec({"banana", "banana"}), - toNullableVec({"apple", "banana"})}, - 5); - - executeStreams(request, - {toNullableVec({"banana", "banana"}), - toNullableVec({"apple", "banana"}), - toNullableVec({"banana", "banana"}), - toNullableVec({"apple", "banana"})}); + ASSERT_COLUMNS_EQ_R(executeStreams(request, 2), + createColumns({toNullableVec({"banana", "banana"}), + toNullableVec({"apple", "banana"}), + toNullableVec({"banana", "banana"}), + toNullableVec({"apple", "banana"})})); + + ASSERT_COLUMNS_EQ_R(executeStreams(request, 5), + createColumns({toNullableVec({"banana", "banana"}), + toNullableVec({"apple", "banana"}), + toNullableVec({"banana", "banana"}), + toNullableVec({"apple", "banana"})})); + + ASSERT_COLUMNS_EQ_R(executeStreams(request), + createColumns({toNullableVec({"banana", "banana"}), + toNullableVec({"apple", "banana"}), + toNullableVec({"banana", "banana"}), + toNullableVec({"apple", "banana"})})); } } CATCH @@ -216,15 +209,14 @@ try " table_scan_0 | {<0, String>, <1, String>}\n" " exchange_receiver_1 | type:PassThrough, {<0, String>, <1, String>}\n"; ASSERT_DAGREQUEST_EQAUL(expected, request); - executeStreams(request, - {toNullableVec({"banana", "banana"}), - toNullableVec({"apple", "banana"}), - toNullableVec({"banana", "banana"}), - toNullableVec({"apple", "banana"})}, - 2); + ASSERT_COLUMNS_EQ_R(executeStreams(request, 2), + createColumns({toNullableVec({"banana", "banana"}), + toNullableVec({"apple", "banana"}), + toNullableVec({"banana", "banana"}), + toNullableVec({"apple", "banana"})})); } } CATCH } // namespace tests -} // namespace DB \ No newline at end of file +} // namespace DB diff --git a/dbms/src/Flash/tests/gtest_interpreter.cpp b/dbms/src/Flash/tests/gtest_interpreter.cpp index ba7d8fd15ee..75a0857465e 100644 --- a/dbms/src/Flash/tests/gtest_interpreter.cpp +++ b/dbms/src/Flash/tests/gtest_interpreter.cpp @@ -31,8 +31,8 @@ class InterpreterExecuteTest : public DB::tests::ExecutorTest context.addMockTable({"test_db", "r_table"}, {{"r_a", TiDB::TP::TypeLong}, {"r_b", TiDB::TP::TypeString}, {"join_c", TiDB::TP::TypeString}}); context.addMockTable({"test_db", "l_table"}, {{"l_a", TiDB::TP::TypeLong}, {"l_b", TiDB::TP::TypeString}, {"join_c", TiDB::TP::TypeString}}); context.addExchangeRelationSchema("sender_1", {{"s1", TiDB::TP::TypeString}, {"s2", TiDB::TP::TypeString}, {"s3", TiDB::TP::TypeString}}); - context.addExchangeRelationSchema("sender_l", {{"l_a", TiDB::TP::TypeString}, {"l_b", TiDB::TP::TypeString}, {"join_c", TiDB::TP::TypeString}}); - context.addExchangeRelationSchema("sender_r", {{"r_a", TiDB::TP::TypeString}, {"r_b", TiDB::TP::TypeString}, {"join_c", TiDB::TP::TypeString}}); + context.addExchangeRelationSchema("sender_l", {{"l_a", TiDB::TP::TypeLong}, {"l_b", TiDB::TP::TypeString}, {"join_c", TiDB::TP::TypeString}}); + context.addExchangeRelationSchema("sender_r", {{"r_a", TiDB::TP::TypeLong}, {"r_b", TiDB::TP::TypeString}, {"join_c", TiDB::TP::TypeString}}); } }; @@ -96,22 +96,19 @@ try auto request = context.scan("test_db", "test_table_1") .project({"s1", "s2", "s3"}) .project({"s1", "s2"}) - .project("s1") + .project({"s1"}) .build(context); { String expected = R"( Union: Expression x 10: Expression: - Expression: - Expression: - Expression: - Expression: + Expression: + Expression: + Expression: + Expression: Expression: - Expression: - Expression: - Expression: - MockTableScan)"; + MockTableScan)"; ASSERT_BLOCKINPUTSTREAM_EQAUL(expected, request, 10); } @@ -125,16 +122,14 @@ Union: Union: Expression x 10: Expression: - Expression: - SharedQuery: - Expression: - MergeSorting, limit = 10 - Union: - PartialSorting x 10: limit = 10 - Expression: - Expression: - Expression: - MockTableScan)"; + SharedQuery: + Expression: + MergeSorting, limit = 10 + Union: + PartialSorting x 10: limit = 10 + Expression: + Expression: + MockTableScan)"; ASSERT_BLOCKINPUTSTREAM_EQAUL(expected, request, 10); } @@ -150,22 +145,18 @@ Union: Union: Expression x 10: Expression: - Expression: - Expression: - SharedQuery: - ParallelAggregating, max_threads: 10, final: true - Expression x 10: - Expression: - Expression: - SharedQuery: - Expression: - MergeSorting, limit = 10 - Union: - PartialSorting x 10: limit = 10 - Expression: - Expression: - Expression: - MockTableScan)"; + Expression: + SharedQuery: + ParallelAggregating, max_threads: 10, final: true + Expression x 10: + SharedQuery: + Expression: + MergeSorting, limit = 10 + Union: + PartialSorting x 10: limit = 10 + Expression: + Expression: + MockTableScan)"; ASSERT_BLOCKINPUTSTREAM_EQAUL(expected, request, 10); } @@ -188,97 +179,48 @@ Union: Limit x 10, limit = 10 Expression: Expression: - Expression: - Expression: - Expression: - Filter: - Expression: - Expression: - Expression: - SharedQuery: - ParallelAggregating, max_threads: 10, final: true - Expression x 10: - Expression: - Expression: - SharedQuery: - Expression: - MergeSorting, limit = 10 - Union: - PartialSorting x 10: limit = 10 - Expression: - Expression: - Expression: - MockTableScan)"; - ASSERT_BLOCKINPUTSTREAM_EQAUL(expected, request, 10); - } - - // Join Source. - DAGRequestBuilder table1 = context.scan("test_db", "r_table"); - DAGRequestBuilder table2 = context.scan("test_db", "l_table"); - DAGRequestBuilder table3 = context.scan("test_db", "r_table"); - DAGRequestBuilder table4 = context.scan("test_db", "l_table"); - - request = table1.join( - table2.join( - table3.join(table4, - {col("join_c")}, - ASTTableJoin::Kind::Left), - {col("join_c")}, - ASTTableJoin::Kind::Left), - {col("join_c")}, - ASTTableJoin::Kind::Left) - .build(context); - { - String expected = R"( -CreatingSets - Union: - HashJoinBuildBlockInputStream x 10: , join_kind = Left - Expression: - Expression: - MockTableScan - Union x 2: - HashJoinBuildBlockInputStream x 10: , join_kind = Left - Expression: - Expression: - Expression: - HashJoinProbe: Expression: - MockTableScan - Union: - Expression x 10: - Expression: - HashJoinProbe: - Expression: - MockTableScan)"; + Expression: + Filter: + Expression: + Expression: + SharedQuery: + ParallelAggregating, max_threads: 10, final: true + Expression x 10: + SharedQuery: + Expression: + MergeSorting, limit = 10 + Union: + PartialSorting x 10: limit = 10 + Expression: + Expression: + MockTableScan)"; ASSERT_BLOCKINPUTSTREAM_EQAUL(expected, request, 10); } request = context.receive("sender_1") .project({"s1", "s2", "s3"}) .project({"s1", "s2"}) - .project("s1") + .project({"s1"}) .build(context); { String expected = R"( Union: Expression x 10: Expression: - Expression: - Expression: - Expression: - Expression: + Expression: + Expression: + Expression: + Expression: Expression: - Expression: - Expression: - Expression: - MockExchangeReceiver)"; + MockExchangeReceiver)"; ASSERT_BLOCKINPUTSTREAM_EQAUL(expected, request, 10); } request = context.receive("sender_1") .project({"s1", "s2", "s3"}) .project({"s1", "s2"}) - .project("s1") + .project({"s1"}) .exchangeSender(tipb::Broadcast) .build(context); { @@ -287,35 +229,230 @@ Union: MockExchangeSender x 10 Expression: Expression: - Expression: - Expression: - Expression: - Expression: + Expression: + Expression: + Expression: + Expression: Expression: + MockExchangeReceiver)"; + ASSERT_BLOCKINPUTSTREAM_EQAUL(expected, request, 10); + } +} +CATCH + +TEST_F(InterpreterExecuteTest, Window) +try +{ + auto request = context + .scan("test_db", "test_table") + .sort({{"s1", true}, {"s2", false}}, true) + .window(RowNumber(), {"s1", true}, {"s2", false}, buildDefaultRowsFrame()) + .build(context); + { + String expected = R"( +Union: + Expression x 10: + SharedQuery: + Expression: + Window, function: {row_number}, frame: {type: Rows, boundary_begin: Current, boundary_end: Current} + Expression: + MergeSorting, limit = 0 + Union: + PartialSorting x 10: limit = 0 + Expression: + MockTableScan)"; + ASSERT_BLOCKINPUTSTREAM_EQAUL(expected, request, 10); + } + + request = context.scan("test_db", "test_table") + .sort({{"s1", true}, {"s2", false}}, true) + .window(RowNumber(), {"s1", true}, {"s2", false}, buildDefaultRowsFrame()) + .project({"s1", "s2", "RowNumber()"}) + .build(context); + { + String expected = R"( +Union: + Expression x 10: + Expression: + Expression: + SharedQuery: + Expression: + Window, function: {row_number}, frame: {type: Rows, boundary_begin: Current, boundary_end: Current} + Expression: + MergeSorting, limit = 0 + Union: + PartialSorting x 10: limit = 0 + Expression: + MockTableScan)"; + ASSERT_BLOCKINPUTSTREAM_EQAUL(expected, request, 10); + } + + request = context.scan("test_db", "test_table_1") + .sort({{"s1", true}, {"s2", false}}, true) + .project({"s1", "s2", "s3"}) + .window(RowNumber(), {"s1", true}, {"s1", false}, buildDefaultRowsFrame()) + .project({"s1", "s2", "s3", "RowNumber()"}) + .build(context); + { + String expected = R"( +Union: + Expression x 10: + Expression: + Expression: + SharedQuery: + Expression: + Window, function: {row_number}, frame: {type: Rows, boundary_begin: Current, boundary_end: Current} + Union: + Expression x 10: Expression: - Expression: + SharedQuery: Expression: - MockExchangeReceiver)"; + MergeSorting, limit = 0 + Union: + PartialSorting x 10: limit = 0 + Expression: + MockTableScan)"; ASSERT_BLOCKINPUTSTREAM_EQAUL(expected, request, 10); } +} +CATCH - // only join + ExchangeReceiver - DAGRequestBuilder receiver1 = context.receive("sender_l"); - DAGRequestBuilder receiver2 = context.receive("sender_r"); - DAGRequestBuilder receiver3 = context.receive("sender_l"); - DAGRequestBuilder receiver4 = context.receive("sender_r"); - - request = receiver1.join( - receiver2.join( - receiver3.join(receiver4, - {col("join_c")}, - ASTTableJoin::Kind::Left), - {col("join_c")}, - ASTTableJoin::Kind::Left), - {col("join_c")}, - ASTTableJoin::Kind::Left) +TEST_F(InterpreterExecuteTest, FineGrainedShuffle) +try +{ + // fine-grained shuffle is enabled. + const uint64_t enable = 8; + const uint64_t disable = 0; + auto request = context + .receive("sender_1", enable) + .sort({{"s1", true}, {"s2", false}}, true, enable) + .window(RowNumber(), {"s1", true}, {"s2", false}, buildDefaultRowsFrame(), enable) + .build(context); + { + String expected = R"( +Union: + Expression x 10: + Expression: + Window: , function: {row_number}, frame: {type: Rows, boundary_begin: Current, boundary_end: Current} + Expression: + MergeSorting: , limit = 0 + PartialSorting: : limit = 0 + Expression: + MockExchangeReceiver + )"; + ASSERT_BLOCKINPUTSTREAM_EQAUL(expected, request, 10); + } + + auto topn_request = context + .receive("sender_1") + .topN("s2", false, 10) + .build(context); + String topn_expected = R"( +Union: + SharedQuery x 10: + Expression: + MergeSorting, limit = 10 + Union: + PartialSorting x 10: limit = 10 + MockExchangeReceiver + )"; + ASSERT_BLOCKINPUTSTREAM_EQAUL(topn_expected, topn_request, 10); + + // fine-grained shuffle is disabled. + request = context + .receive("sender_1", disable) + .sort({{"s1", true}, {"s2", false}}, true, disable) + .window(RowNumber(), {"s1", true}, {"s2", false}, buildDefaultRowsFrame(), disable) .build(context); { + String expected = R"( +Union: + Expression x 10: + SharedQuery: + Expression: + Window, function: {row_number}, frame: {type: Rows, boundary_begin: Current, boundary_end: Current} + Expression: + MergeSorting, limit = 0 + Union: + PartialSorting x 10: limit = 0 + Expression: + MockExchangeReceiver + )"; + ASSERT_BLOCKINPUTSTREAM_EQAUL(expected, request, 10); + } + + topn_request = context + .receive("sender_1") + .topN("s2", false, 10) + .build(context); + ASSERT_BLOCKINPUTSTREAM_EQAUL(topn_expected, topn_request, 10); +} +CATCH + +TEST_F(InterpreterExecuteTest, Join) +try +{ + // TODO: Find a way to write the request easier. + { + // Join Source. + DAGRequestBuilder table1 = context.scan("test_db", "r_table"); + DAGRequestBuilder table2 = context.scan("test_db", "l_table"); + DAGRequestBuilder table3 = context.scan("test_db", "r_table"); + DAGRequestBuilder table4 = context.scan("test_db", "l_table"); + + auto request = table1.join( + table2.join( + table3.join(table4, + {col("join_c")}, + ASTTableJoin::Kind::Left), + {col("join_c")}, + ASTTableJoin::Kind::Left), + {col("join_c")}, + ASTTableJoin::Kind::Left) + .build(context); + + String expected = R"( +CreatingSets + Union: + HashJoinBuildBlockInputStream x 10: , join_kind = Left + Expression: + Expression: + MockTableScan + Union x 2: + HashJoinBuildBlockInputStream x 10: , join_kind = Left + Expression: + Expression: + Expression: + HashJoinProbe: + Expression: + MockTableScan + Union: + Expression x 10: + Expression: + HashJoinProbe: + Expression: + MockTableScan)"; + ASSERT_BLOCKINPUTSTREAM_EQAUL(expected, request, 10); + } + + { + // only join + ExchangeReceiver + DAGRequestBuilder receiver1 = context.receive("sender_l"); + DAGRequestBuilder receiver2 = context.receive("sender_r"); + DAGRequestBuilder receiver3 = context.receive("sender_l"); + DAGRequestBuilder receiver4 = context.receive("sender_r"); + + auto request = receiver1.join( + receiver2.join( + receiver3.join(receiver4, + {col("join_c")}, + ASTTableJoin::Kind::Left), + {col("join_c")}, + ASTTableJoin::Kind::Left), + {col("join_c")}, + ASTTableJoin::Kind::Left) + .build(context); + String expected = R"( CreatingSets Union: @@ -340,24 +477,25 @@ CreatingSets ASSERT_BLOCKINPUTSTREAM_EQAUL(expected, request, 10); } - // join + receiver + sender - // TODO: Find a way to write the request easier. - DAGRequestBuilder receiver5 = context.receive("sender_l"); - DAGRequestBuilder receiver6 = context.receive("sender_r"); - DAGRequestBuilder receiver7 = context.receive("sender_l"); - DAGRequestBuilder receiver8 = context.receive("sender_r"); - request = receiver5.join( - receiver6.join( - receiver7.join(receiver8, - {col("join_c")}, - ASTTableJoin::Kind::Left), - {col("join_c")}, - ASTTableJoin::Kind::Left), - {col("join_c")}, - ASTTableJoin::Kind::Left) - .exchangeSender(tipb::PassThrough) - .build(context); { + // join + receiver + sender + DAGRequestBuilder receiver1 = context.receive("sender_l"); + DAGRequestBuilder receiver2 = context.receive("sender_r"); + DAGRequestBuilder receiver3 = context.receive("sender_l"); + DAGRequestBuilder receiver4 = context.receive("sender_r"); + + auto request = receiver1.join( + receiver2.join( + receiver3.join(receiver4, + {col("join_c")}, + ASTTableJoin::Kind::Left), + {col("join_c")}, + ASTTableJoin::Kind::Left), + {col("join_c")}, + ASTTableJoin::Kind::Left) + .exchangeSender(tipb::PassThrough) + .build(context); + String expected = R"( CreatingSets Union: @@ -385,85 +523,111 @@ CreatingSets } CATCH -TEST_F(InterpreterExecuteTest, Window) +TEST_F(InterpreterExecuteTest, JoinThenAgg) try { - auto request = context - .scan("test_db", "test_table") - .sort({{"s1", true}, {"s2", false}}, true) - .window(RowNumber(), {"s1", true}, {"s2", false}, buildDefaultRowsFrame()) - .build(context); { + // Left Join. + DAGRequestBuilder table1 = context.scan("test_db", "r_table"); + DAGRequestBuilder table2 = context.scan("test_db", "l_table"); + + auto request = table1.join( + table2, + {col("join_c")}, + ASTTableJoin::Kind::Left) + .aggregation({Max(col("r_a"))}, {col("join_c")}) + .build(context); String expected = R"( -Union: - Expression x 10: - SharedQuery: - Expression: - Window, function: {row_number}, frame: {type: Rows, boundary_begin: Current, boundary_end: Current} - Expression: - MergeSorting, limit = 0 - Union: - PartialSorting x 10: limit = 0 - Expression: - MockTableScan)"; +CreatingSets + Union: + HashJoinBuildBlockInputStream x 10: , join_kind = Left + Expression: + Expression: + MockTableScan + Union: + Expression x 10: + SharedQuery: + ParallelAggregating, max_threads: 10, final: true + Expression x 10: + HashJoinProbe: + Expression: + MockTableScan)"; ASSERT_BLOCKINPUTSTREAM_EQAUL(expected, request, 10); } - request = context.scan("test_db", "test_table") - .sort({{"s1", true}, {"s2", false}}, true) - .window(RowNumber(), {"s1", true}, {"s2", false}, buildDefaultRowsFrame()) - .project({"s1", "s2", "RowNumber()"}) - .build(context); { + // Right Join + DAGRequestBuilder table1 = context.scan("test_db", "r_table"); + DAGRequestBuilder table2 = context.scan("test_db", "l_table"); + + auto request = table1.join( + table2, + {col("join_c")}, + ASTTableJoin::Kind::Right) + .aggregation({Max(col("r_a"))}, {col("join_c")}) + .build(context); String expected = R"( -Union: - Expression x 10: - Expression: - Expression: +CreatingSets + Union: + HashJoinBuildBlockInputStream x 10: , join_kind = Right + Expression: Expression: - SharedQuery: - Expression: - Window, function: {row_number}, frame: {type: Rows, boundary_begin: Current, boundary_end: Current} + MockTableScan + Union: + Expression x 10: + SharedQuery: + ParallelAggregating, max_threads: 10, final: true + Expression x 10: + HashJoinProbe: + Expression: Expression: - MergeSorting, limit = 0 - Union: - PartialSorting x 10: limit = 0 - Expression: - MockTableScan)"; + MockTableScan + Expression x 10: + NonJoined: )"; ASSERT_BLOCKINPUTSTREAM_EQAUL(expected, request, 10); } - request = context.scan("test_db", "test_table_1") - .sort({{"s1", true}, {"s2", false}}, true) - .project({"s1", "s2", "s3"}) - .window(RowNumber(), {"s1", true}, {"s1", false}, buildDefaultRowsFrame()) - .project({"s1", "s2", "s3", "RowNumber()"}) - .build(context); { + // Right join + receiver + sender + DAGRequestBuilder receiver1 = context.receive("sender_l"); + DAGRequestBuilder receiver2 = context.receive("sender_r"); + + auto request = receiver1.join( + receiver2, + {col("join_c")}, + ASTTableJoin::Kind::Right) + .aggregation({Sum(col("r_a"))}, {col("join_c")}) + .exchangeSender(tipb::PassThrough) + .limit(10) + .build(context); String expected = R"( -Union: - Expression x 10: - Expression: - Expression: +CreatingSets + Union: + HashJoinBuildBlockInputStream x 20: , join_kind = Right + Expression: Expression: - SharedQuery: - Expression: - Window, function: {row_number}, frame: {type: Rows, boundary_begin: Current, boundary_end: Current} - Union: - Expression x 10: - Expression: - Expression: - SharedQuery: - Expression: - MergeSorting, limit = 0 - Union: - PartialSorting x 10: limit = 0 - Expression: - MockTableScan)"; - ASSERT_BLOCKINPUTSTREAM_EQAUL(expected, request, 10); + MockExchangeReceiver + Union: + MockExchangeSender x 20 + SharedQuery: + Limit, limit = 10 + Union: + Limit x 20, limit = 10 + Expression: + Expression: + SharedQuery: + ParallelAggregating, max_threads: 20, final: true + Expression x 20: + HashJoinProbe: + Expression: + Expression: + MockExchangeReceiver + Expression x 20: + NonJoined: )"; + ASSERT_BLOCKINPUTSTREAM_EQAUL(expected, request, 20); } } CATCH } // namespace tests -} // namespace DB \ No newline at end of file +} // namespace DB diff --git a/dbms/src/Flash/tests/gtest_limit_executor.cpp b/dbms/src/Flash/tests/gtest_limit_executor.cpp new file mode 100644 index 00000000000..47482540b39 --- /dev/null +++ b/dbms/src/Flash/tests/gtest_limit_executor.cpp @@ -0,0 +1,77 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +namespace DB +{ +namespace tests +{ + +class ExecutorLimitTestRunner : public DB::tests::ExecutorTest +{ +public: + using ColDataType = std::optional::FieldType>; + using ColumnWithData = std::vector; + + void initializeContext() override + { + ExecutorTest::initializeContext(); + + context.addMockTable({db_name, table_name}, + {{col_name, TiDB::TP::TypeString}}, + {toNullableVec(col_name, col0)}); + } + + std::shared_ptr buildDAGRequest(size_t limit_num) + { + return context.scan(db_name, table_name).limit(limit_num).build(context); + } + + /// Prepare some names + const String db_name{"test_db"}; + const String table_name{"projection_test_table"}; + const String col_name{"limit_col"}; + const ColumnWithData col0{"col0-0", {}, "col0-2", "col0-3", {}, "col0-5", "col0-6", "col0-7"}; +}; + +TEST_F(ExecutorLimitTestRunner, Limit) +try +{ + std::shared_ptr request; + ColumnsWithTypeAndName expect_cols; + + /// Check limit result with various parameters + const size_t col_data_num = col0.size(); + for (size_t limit_num = 0; limit_num <= col_data_num + 3; ++limit_num) + { + if (limit_num == col_data_num + 3) + limit_num = INT_MAX; + request = buildDAGRequest(limit_num); + + if (limit_num == 0) + expect_cols = {}; + else if (limit_num > col_data_num) + expect_cols = {toNullableVec(col_name, ColumnWithData(col0.begin(), col0.end()))}; + else + expect_cols = {toNullableVec(col_name, ColumnWithData(col0.begin(), col0.begin() + limit_num))}; + + ASSERT_COLUMNS_EQ_R(executeStreams(request), expect_cols); + } +} +CATCH + +} // namespace tests +} // namespace DB diff --git a/dbms/src/Flash/tests/gtest_projection_executor.cpp b/dbms/src/Flash/tests/gtest_projection_executor.cpp new file mode 100644 index 00000000000..8443dedeb49 --- /dev/null +++ b/dbms/src/Flash/tests/gtest_projection_executor.cpp @@ -0,0 +1,224 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +namespace DB +{ +namespace tests +{ + +class ExecutorProjectionTestRunner : public DB::tests::ExecutorTest +{ +public: + using ColDataString = std::vector::FieldType>>; + using ColDataInt32 = std::vector::FieldType>>; + + void initializeContext() override + { + ExecutorTest::initializeContext(); + + context.addMockTable({db_name, table_name}, + {{col_names[0], TiDB::TP::TypeString}, + {col_names[1], TiDB::TP::TypeString}, + {col_names[2], TiDB::TP::TypeString}, + {col_names[3], TiDB::TP::TypeLong}, + {col_names[4], TiDB::TP::TypeLong}}, + {toNullableVec(col_names[0], col0), + toNullableVec(col_names[1], col1), + toNullableVec(col_names[2], col2), + toNullableVec(col_names[3], col3), + toNullableVec(col_names[4], col4)}); + } + + template + std::shared_ptr buildDAGRequest(T param) + { + return context.scan(db_name, table_name).project(param).build(context); + }; + + void executeWithConcurrency(const std::shared_ptr & request, const ColumnsWithTypeAndName & expect_columns) + { + for (size_t i = 1; i < 10; i += 2) + { + ASSERT_COLUMNS_EQ_UR(executeStreams(request, i), expect_columns); + } + } + + /// Prepare column data + const ColDataString col0{"col0-0", "col0-1", "", "col0-2", {}, "col0-3", ""}; + const ColDataString col1{"col1-0", {}, "", "col1-1", "", "col1-2", "col1-3"}; + const ColDataString col2{"", "col2-0", "col2-1", {}, "col2-3", {}, "col2-4"}; + const ColDataInt32 col3{1, {}, 0, -111111, {}, 0, 9999}; + + /** Each value in col4 should be different from each other so that topn + * could sort the columns into an unique result, or multi-results could + * be right. + */ + const ColDataInt32 col4{0, 5, -123, -234, {}, 24353, 9999}; + + /// Results after sorted by col4 + const ColDataString col0_sorted_asc{{}, "col0-2", "", "col0-0", "col0-1", "", "col0-3"}; + const ColDataString col1_sorted_asc{"", "col1-1", "", "col1-0", {}, "col1-3", "col1-2"}; + const ColDataString col2_sorted_asc{"col2-3", {}, "col2-1", "", "col2-0", "col2-4", {}}; + const ColDataInt32 col3_sorted_asc{{}, -111111, 0, 1, {}, 9999, 0}; + const ColDataInt32 col4_sorted_asc{{}, -234, -123, 0, 5, 9999, 24353}; + + /// Prepare some names + std::vector col_names{"col0", "col1", "col2", "col3", "col4"}; + const String db_name{"test_db"}; + const String table_name{"projection_test_table"}; +}; + +TEST_F(ExecutorProjectionTestRunner, Projection) +try +{ + /// Check single column + auto request = buildDAGRequest({col_names[4]}); + executeWithConcurrency(request, {toNullableVec(col_names[4], col4_sorted_asc)}); + + /// Check multi columns + request = buildDAGRequest({col_names[0], col_names[4]}); + executeWithConcurrency(request, + { + toNullableVec(col_names[0], col0_sorted_asc), + toNullableVec(col_names[4], col4_sorted_asc), + }); + + /// Check multi columns + request = buildDAGRequest({col_names[0], col_names[1], col_names[4]}); + executeWithConcurrency(request, + {toNullableVec(col_names[0], col0_sorted_asc), + toNullableVec(col_names[1], col1_sorted_asc), + toNullableVec(col_names[4], col4_sorted_asc)}); + + /// Check duplicate columns + request = buildDAGRequest({col_names[4], col_names[4], col_names[4]}); + executeWithConcurrency(request, + {toNullableVec(col_names[4], col4_sorted_asc), + toNullableVec(col_names[4], col4_sorted_asc), + toNullableVec(col_names[4], col4_sorted_asc)}); + + { + /// Check large number of columns + const size_t col_num = 100; + MockColumnNameVec projection_input; + ColumnsWithTypeAndName columns; + auto expect_column = toNullableVec(col_names[4], col4_sorted_asc); + + for (size_t i = 0; i < col_num; ++i) + { + projection_input.push_back(col_names[4]); + columns.push_back(expect_column); + } + + request = buildDAGRequest(projection_input); + executeWithConcurrency(request, columns); + } +} +CATCH + +TEST_F(ExecutorProjectionTestRunner, ProjectionFunction) +try +{ + std::shared_ptr request; + + /// Test "equal" function + + /// Data type: TypeString + request = buildDAGRequest({eq(col(col_names[0]), col(col_names[0])), col(col_names[4])}); + executeWithConcurrency(request, + {toNullableVec({{}, 1, 1, 1, 1, 1, 1}), + toNullableVec(col_names[4], col4_sorted_asc)}); + + request = buildDAGRequest({eq(col(col_names[0]), col(col_names[1])), col(col_names[4])}); + executeWithConcurrency(request, + {toNullableVec({{}, 0, 1, 0, {}, 0, 0}), + toNullableVec(col_names[4], col4_sorted_asc)}); + + /// Data type: TypeLong + request = buildDAGRequest({eq(col(col_names[3]), col(col_names[4])), col(col_names[4])}); + executeWithConcurrency(request, + {toNullableVec({{}, 0, 0, 0, {}, 1, 0}), + toNullableVec(col_names[4], col4_sorted_asc)}); + + + /// Test "greater" function + + /// Data type: TypeString + request = buildDAGRequest({gt(col(col_names[0]), col(col_names[1])), col(col_names[4])}); + executeWithConcurrency(request, + {toNullableVec({{}, 0, 0, 0, {}, 0, 0}), + toNullableVec(col_names[4], col4_sorted_asc)}); + + request = buildDAGRequest({gt(col(col_names[1]), col(col_names[0])), col(col_names[4])}); + executeWithConcurrency(request, + {toNullableVec({{}, 1, 0, 1, {}, 1, 1}), + toNullableVec(col_names[4], col4_sorted_asc)}); + + /// Data type: TypeLong + request = buildDAGRequest({gt(col(col_names[3]), col(col_names[4])), col(col_names[4])}); + executeWithConcurrency(request, + {toNullableVec({{}, 0, 1, 1, {}, 0, 0}), + toNullableVec(col_names[4], col4_sorted_asc)}); + + request = buildDAGRequest({gt(col(col_names[4]), col(col_names[3])), col(col_names[4])}); + executeWithConcurrency(request, + {toNullableVec({{}, 1, 0, 0, {}, 0, 1}), + toNullableVec(col_names[4], col4_sorted_asc)}); + + + /// Test "and" function + + /// Data type: TypeString + request = buildDAGRequest({And(col(col_names[0]), col(col_names[0])), col(col_names[4])}); + executeWithConcurrency(request, + {toNullableVec({{}, 0, 0, 0, 0, 0, 0}), + toNullableVec(col_names[4], col4_sorted_asc)}); + + request = buildDAGRequest({And(col(col_names[0]), col(col_names[1])), col(col_names[4])}); + executeWithConcurrency(request, + {toNullableVec({0, 0, 0, 0, 0, 0, 0}), + toNullableVec(col_names[4], col4_sorted_asc)}); + + /// Data type: TypeLong + request = buildDAGRequest({And(col(col_names[3]), col(col_names[4])), col(col_names[4])}); + executeWithConcurrency(request, + {toNullableVec({{}, 1, 0, 0, {}, 1, 0}), + toNullableVec(col_names[4], col4_sorted_asc)}); + + /// Test "not" function + + /// Data type: TypeString + request = buildDAGRequest({NOT(col(col_names[0])), NOT(col(col_names[1])), NOT(col(col_names[2])), col(col_names[4])}); + executeWithConcurrency(request, + {toNullableVec({{}, 1, 1, 1, 1, 1, 1}), + toNullableVec({1, 1, 1, 1, {}, 1, 1}), + toNullableVec({1, {}, 1, 1, 1, 1, {}}), + toNullableVec(col_names[4], col4_sorted_asc)}); + + /// Data type: TypeLong + request = buildDAGRequest({NOT(col(col_names[3])), NOT(col(col_names[4])), col(col_names[4])}); + executeWithConcurrency(request, + {toNullableVec({{}, 0, 1, 0, {}, 0, 1}), + toNullableVec({{}, 0, 0, 1, 0, 0, 0}), + toNullableVec(col_names[4], col4_sorted_asc)}); + + /// TODO more functions... +} +CATCH + +} // namespace tests +} // namespace DB diff --git a/dbms/src/Flash/tests/gtest_topn_executor.cpp b/dbms/src/Flash/tests/gtest_topn_executor.cpp new file mode 100644 index 00000000000..597ac9f279a --- /dev/null +++ b/dbms/src/Flash/tests/gtest_topn_executor.cpp @@ -0,0 +1,221 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +namespace DB +{ +namespace tests +{ + +class ExecutorTopNTestRunner : public DB::tests::ExecutorTest +{ +public: + using ColStringType = std::optional::FieldType>; + using ColInt32Type = std::optional::FieldType>; + using ColumnWithString = std::vector; + using ColumnWithInt32 = std::vector; + + void initializeContext() override + { + ExecutorTest::initializeContext(); + + context.addMockTable({db_name, table_single_name}, + {{single_col_name, TiDB::TP::TypeString}}, + {toNullableVec(single_col_name, col0)}); + + context.addMockTable({db_name, table_name}, + {{col_name[0], TiDB::TP::TypeLong}, + {col_name[1], TiDB::TP::TypeString}, + {col_name[2], TiDB::TP::TypeString}, + {col_name[3], TiDB::TP::TypeLong}}, + {toNullableVec(col_name[0], col_age), + toNullableVec(col_name[1], col_gender), + toNullableVec(col_name[2], col_country), + toNullableVec(col_name[3], c0l_salary)}); + } + + std::shared_ptr buildDAGRequest(const String & table_name, const String & col_name, bool is_desc, int limit_num) + { + return context.scan(db_name, table_name).topN(col_name, is_desc, limit_num).build(context); + } + + std::shared_ptr buildDAGRequest(const String & table_name, MockOrderByItemVec order_by_items, int limit, MockAstVec func_proj_ast = {}, MockColumnNameVec out_proj_ast = {}) + { + if (func_proj_ast.size() == 0) + return context.scan(db_name, table_name).topN(order_by_items, limit).build(context); + else + return context.scan(db_name, table_name).project(func_proj_ast).topN(order_by_items, limit).project(out_proj_ast).build(context); + } + + /// Prepare some names + const String db_name{"test_db"}; + + const String table_single_name{"topn_single_table"}; /// For single column test + const String single_col_name{"single_col"}; + ColumnWithString col0{"col0-0", "col0-1", "col0-2", {}, "col0-4", {}, "col0-6", "col0-7"}; + + const String table_name{"clerk"}; + const std::vector col_name{"age", "gender", "country", "salary"}; + ColumnWithInt32 col_age{{}, 27, 32, 36, {}, 34}; + ColumnWithString col_gender{"female", "female", "male", "female", "male", "male"}; + ColumnWithString col_country{"korea", "usa", "usa", "china", "china", "china"}; + ColumnWithInt32 c0l_salary{1300, 0, {}, 900, {}, -300}; +}; + +TEST_F(ExecutorTopNTestRunner, TopN) +try +{ + std::shared_ptr request; + std::vector expect_cols; + + { + /// Test single column + size_t col_data_num = col0.size(); + for (size_t i = 1; i <= 1; ++i) + { + bool is_desc; + is_desc = static_cast(i); /// Set descent or ascent + if (is_desc) + sort(col0.begin(), col0.end(), std::greater()); /// Sort col0 for the following comparison + else + sort(col0.begin(), col0.end()); + + for (size_t limit_num = 0; limit_num <= col_data_num + 5; ++limit_num) + { + request = buildDAGRequest(table_single_name, single_col_name, is_desc, limit_num); + + expect_cols.clear(); + if (limit_num == 0 || limit_num > col_data_num) + expect_cols.push_back({toNullableVec(single_col_name, ColumnWithString(col0.begin(), col0.end()))}); + else + expect_cols.push_back({toNullableVec(single_col_name, ColumnWithString(col0.begin(), col0.begin() + limit_num))}); + + ASSERT_COLUMNS_EQ_R(executeStreams(request), expect_cols[0]); + ASSERT_COLUMNS_EQ_R(executeStreams(request, 2), expect_cols[0]); + ASSERT_COLUMNS_EQ_R(executeStreams(request, 4), expect_cols[0]); + ASSERT_COLUMNS_EQ_R(executeStreams(request, 8), expect_cols[0]); + } + } + } + + { + /// Test multi-columns + expect_cols = {{toNullableVec(col_name[0], ColumnWithInt32{36, 34, 32, 27, {}, {}}), + toNullableVec(col_name[1], ColumnWithString{"female", "male", "male", "female", "male", "female"}), + toNullableVec(col_name[2], ColumnWithString{"china", "china", "usa", "usa", "china", "korea"}), + toNullableVec(col_name[3], ColumnWithInt32{900, -300, {}, 0, {}, 1300})}, + {toNullableVec(col_name[0], ColumnWithInt32{32, {}, 34, 27, 36, {}}), + toNullableVec(col_name[1], ColumnWithString{"male", "male", "male", "female", "female", "female"}), + toNullableVec(col_name[2], ColumnWithString{"usa", "china", "china", "usa", "china", "korea"}), + toNullableVec(col_name[3], ColumnWithInt32{{}, {}, -300, 0, 900, 1300})}, + {toNullableVec(col_name[0], ColumnWithInt32{34, {}, 32, 36, {}, 27}), + toNullableVec(col_name[1], ColumnWithString{"male", "male", "male", "female", "female", "female"}), + toNullableVec(col_name[2], ColumnWithString{"china", "china", "usa", "china", "korea", "usa"}), + toNullableVec(col_name[3], ColumnWithInt32{-300, {}, {}, 900, 1300, 0})}}; + + std::vector order_by_items{ + /// select * from clerk order by age DESC, gender DESC; + {MockOrderByItem(col_name[0], true), MockOrderByItem(col_name[1], true)}, + /// select * from clerk order by gender DESC, salary ASC; + {MockOrderByItem(col_name[1], true), MockOrderByItem(col_name[3], false)}, + /// select * from clerk order by gender DESC, country ASC, salary DESC; + {MockOrderByItem(col_name[1], true), MockOrderByItem(col_name[2], false), MockOrderByItem(col_name[3], true)}}; + + size_t test_num = expect_cols.size(); + + for (size_t i = 0; i < test_num; ++i) + { + request = buildDAGRequest(table_name, order_by_items[i], 100); + ASSERT_COLUMNS_EQ_R(executeStreams(request), expect_cols[i]); + } + } +} +CATCH + +TEST_F(ExecutorTopNTestRunner, TopNFunction) +try +{ + std::shared_ptr request; + std::vector expect_cols; + MockColumnNameVec output_projection{col_name[0], col_name[1], col_name[2], col_name[3]}; + MockAstVec func_projection; // Do function operation for topn + MockOrderByItemVec order_by_items; + ASTPtr col0_ast = col(col_name[0]); + ASTPtr col1_ast = col(col_name[1]); + ASTPtr col2_ast = col(col_name[2]); + ASTPtr col3_ast = col(col_name[3]); + ASTPtr func_ast; + + { + /// "and" function + expect_cols = {{toNullableVec(col_name[0], ColumnWithInt32{{}, {}, 32, 27, 36, 34}), + toNullableVec(col_name[1], ColumnWithString{"female", "male", "male", "female", "female", "male"}), + toNullableVec(col_name[2], ColumnWithString{"korea", "china", "usa", "usa", "china", "china"}), + toNullableVec(col_name[3], ColumnWithInt32{1300, {}, {}, 0, 900, -300})}}; + + { + /// select * from clerk order by age and salary ASC limit 100; + order_by_items = {MockOrderByItem("and(age, salary)", false)}; + func_ast = And(col(col_name[0]), col(col_name[3])); + func_projection = {col0_ast, col1_ast, col2_ast, col3_ast, func_ast}; + + request = buildDAGRequest(table_name, order_by_items, 100, func_projection, output_projection); + ASSERT_COLUMNS_EQ_R(executeStreams(request), expect_cols[0]); + } + } + + { + /// "equal" function + expect_cols = {{toNullableVec(col_name[0], ColumnWithInt32{27, 36, 34, 32, {}, {}}), + toNullableVec(col_name[1], ColumnWithString{"female", "female", "male", "male", "female", "male"}), + toNullableVec(col_name[2], ColumnWithString{"usa", "china", "china", "usa", "korea", "china"}), + toNullableVec(col_name[3], ColumnWithInt32{0, 900, -300, {}, 1300, {}})}}; + + { + /// select age, salary from clerk order by age = salary DESC limit 100; + order_by_items = {MockOrderByItem("equals(age, salary)", true)}; + func_ast = eq(col(col_name[0]), col(col_name[3])); + func_projection = {col0_ast, col1_ast, col2_ast, col3_ast, func_ast}; + + request = buildDAGRequest(table_name, order_by_items, 100, func_projection, output_projection); + ASSERT_COLUMNS_EQ_R(executeStreams(request), expect_cols[0]); + } + } + + { + /// "greater" function + expect_cols = {{toNullableVec(col_name[0], ColumnWithInt32{{}, 32, {}, 36, 27, 34}), + toNullableVec(col_name[1], ColumnWithString{"female", "male", "male", "female", "female", "male"}), + toNullableVec(col_name[2], ColumnWithString{"korea", "usa", "china", "china", "usa", "china"}), + toNullableVec(col_name[3], ColumnWithInt32{1300, {}, {}, 900, 0, -300})}}; + + { + /// select age, gender, country, salary from clerk order by age > salary ASC limit 100; + order_by_items = {MockOrderByItem("greater(age, salary)", false)}; + func_ast = gt(col(col_name[0]), col(col_name[3])); + func_projection = {col0_ast, col1_ast, col2_ast, col3_ast, func_ast}; + + request = buildDAGRequest(table_name, order_by_items, 100, func_projection, output_projection); + ASSERT_COLUMNS_EQ_R(executeStreams(request), expect_cols[0]); + } + } + + /// TODO more functions... +} +CATCH + +} // namespace tests +} // namespace DB diff --git a/dbms/src/Functions/CollationOperatorOptimized.h b/dbms/src/Functions/CollationOperatorOptimized.h new file mode 100644 index 00000000000..395ecc5b9eb --- /dev/null +++ b/dbms/src/Functions/CollationOperatorOptimized.h @@ -0,0 +1,210 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include +#include +#include + +#include +#include + + +namespace DB +{ + +template +ALWAYS_INLINE inline int signum(T val) +{ + return (0 < val) - (val < 0); +} + +// Check equality is much faster than other comparison. +// - check size first +// - return 0 if equal else 1 +__attribute__((flatten, always_inline, pure)) inline uint8_t RawStrEqualCompare(const std::string_view & lhs, const std::string_view & rhs) +{ + return StringRef(lhs) == StringRef(rhs) ? 0 : 1; +} + +// Compare str view by memcmp +__attribute__((flatten, always_inline, pure)) inline int RawStrCompare(const std::string_view & v1, const std::string_view & v2) +{ + return signum(v1.compare(v2)); +} + +constexpr char SPACE = ' '; + +// Remove tail space +__attribute__((flatten, always_inline, pure)) inline std::string_view RightTrim(const std::string_view & v) +{ + if (likely(v.empty() || v.back() != SPACE)) + return v; + size_t end = v.find_last_not_of(SPACE); + return end == std::string_view::npos ? std::string_view{} : std::string_view(v.data(), end + 1); +} + +__attribute__((flatten, always_inline, pure)) inline int RtrimStrCompare(const std::string_view & va, const std::string_view & vb) +{ + return RawStrCompare(RightTrim(va), RightTrim(vb)); +} + +// If true, only need to check equal or not. +template +struct IsEqualRelated +{ + static constexpr const bool value = false; +}; + +// For `EqualsOp` and `NotEqualsOp`, value is true. +template +struct IsEqualRelated> +{ + static constexpr const bool value = true; +}; +template +struct IsEqualRelated> +{ + static constexpr const bool value = true; +}; + +// Loop columns and invoke callback for each pair. +template +__attribute__((flatten, always_inline)) inline void LoopTwoColumns( + const ColumnString::Chars_t & a_data, + const ColumnString::Offsets & a_offsets, + const ColumnString::Chars_t & b_data, + const ColumnString::Offsets & b_offsets, + size_t size, + F && func) +{ + for (size_t i = 0; i < size; ++i) + { + size_t a_size = StringUtil::sizeAt(a_offsets, i) - 1; + size_t b_size = StringUtil::sizeAt(b_offsets, i) - 1; + const auto * a_ptr = reinterpret_cast(&a_data[StringUtil::offsetAt(a_offsets, i)]); + const auto * b_ptr = reinterpret_cast(&b_data[StringUtil::offsetAt(b_offsets, i)]); + + func({a_ptr, a_size}, {b_ptr, b_size}, i); + } +} + +// Loop one column and invoke callback for each pair. +template +__attribute__((flatten, always_inline)) inline void LoopOneColumn( + const ColumnString::Chars_t & a_data, + const ColumnString::Offsets & a_offsets, + size_t size, + F && func) +{ + for (size_t i = 0; i < size; ++i) + { + size_t a_size = StringUtil::sizeAt(a_offsets, i) - 1; + const auto * a_ptr = reinterpret_cast(&a_data[StringUtil::offsetAt(a_offsets, i)]); + + func({a_ptr, a_size}, i); + } +} + +// Handle str-column compare str-column. +// - Optimize UTF8_BIN and UTF8MB4_BIN +// - Check if columns do NOT contain tail space +// - If Op is `EqualsOp` or `NotEqualsOp`, optimize comparison by faster way +template +ALWAYS_INLINE inline bool StringVectorStringVector( + const ColumnString::Chars_t & a_data, + const ColumnString::Offsets & a_offsets, + const ColumnString::Chars_t & b_data, + const ColumnString::Offsets & b_offsets, + const TiDB::TiDBCollatorPtr & collator, + Result & c) +{ + bool use_optimized_path = false; + + switch (collator->getCollatorId()) + { + case TiDB::ITiDBCollator::UTF8MB4_BIN: + case TiDB::ITiDBCollator::UTF8_BIN: + { + size_t size = a_offsets.size(); + + LoopTwoColumns(a_data, a_offsets, b_data, b_offsets, size, [&c](const std::string_view & va, const std::string_view & vb, size_t i) { + if constexpr (IsEqualRelated::value) + { + c[i] = Op::apply(RawStrEqualCompare(RightTrim(va), RightTrim(vb)), 0); + } + else + { + c[i] = Op::apply(RtrimStrCompare(va, vb), 0); + } + }); + + use_optimized_path = true; + + break; + } + default: + break; + } + return use_optimized_path; +} + +// Handle str-column compare const-str. +// - Optimize UTF8_BIN and UTF8MB4_BIN +// - Right trim const-str first +// - Check if column does NOT contain tail space +// - If Op is `EqualsOp` or `NotEqualsOp`, optimize comparison by faster way +template +ALWAYS_INLINE inline bool StringVectorConstant( + const ColumnString::Chars_t & a_data, + const ColumnString::Offsets & a_offsets, + const std::string_view & b, + const TiDB::TiDBCollatorPtr & collator, + Result & c) +{ + bool use_optimized_path = false; + + switch (collator->getCollatorId()) + { + case TiDB::ITiDBCollator::UTF8MB4_BIN: + case TiDB::ITiDBCollator::UTF8_BIN: + { + size_t size = a_offsets.size(); + + std::string_view tar_str_view = RightTrim(b); // right trim const-str first + + LoopOneColumn(a_data, a_offsets, size, [&c, &tar_str_view](const std::string_view & view, size_t i) { + if constexpr (IsEqualRelated::value) + { + c[i] = Op::apply(RawStrEqualCompare(RightTrim(view), tar_str_view), 0); + } + else + { + c[i] = Op::apply(RawStrCompare(RightTrim(view), tar_str_view), 0); + } + }); + + use_optimized_path = true; + break; + } + default: + break; + } + return use_optimized_path; +} + +} // namespace DB diff --git a/dbms/src/Functions/FunctionsComparison.h b/dbms/src/Functions/FunctionsComparison.h index 1c63a286452..8f7502fba85 100644 --- a/dbms/src/Functions/FunctionsComparison.h +++ b/dbms/src/Functions/FunctionsComparison.h @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include @@ -301,6 +302,12 @@ struct StringComparisonWithCollatorImpl const TiDB::TiDBCollatorPtr & collator, PaddedPODArray & c) { + bool optimized_path = StringVectorStringVector(a_data, a_offsets, b_data, b_offsets, collator, c); + if (optimized_path) + { + return; + } + size_t size = a_offsets.size(); for (size_t i = 0; i < size; ++i) @@ -317,10 +324,17 @@ struct StringComparisonWithCollatorImpl static void NO_INLINE stringVectorConstant( const ColumnString::Chars_t & a_data, const ColumnString::Offsets & a_offsets, - const std::string & b, + const std::string_view & b, const TiDB::TiDBCollatorPtr & collator, PaddedPODArray & c) { + bool optimized_path = StringVectorConstant(a_data, a_offsets, b, collator, c); + + if (optimized_path) + { + return; + } + size_t size = a_offsets.size(); ColumnString::Offset b_size = b.size(); const char * b_data = reinterpret_cast(b.data()); @@ -332,7 +346,7 @@ struct StringComparisonWithCollatorImpl } static void constantStringVector( - const std::string & a, + const std::string_view & a, const ColumnString::Chars_t & b_data, const ColumnString::Offsets & b_offsets, const TiDB::TiDBCollatorPtr & collator, @@ -342,8 +356,8 @@ struct StringComparisonWithCollatorImpl } static void constantConstant( - const std::string & a, - const std::string & b, + const std::string_view & a, + const std::string_view & b, const TiDB::TiDBCollatorPtr & collator, ResultType & c) { @@ -706,6 +720,25 @@ class FunctionComparison : public IFunction } } + static inline std::string_view genConstStrRef(const ColumnConst * c0_const) + { + std::string_view c0_const_str_ref{}; + if (c0_const) + { + if (const auto * c0_const_string = checkAndGetColumn(&c0_const->getDataColumn()); c0_const_string) + { + c0_const_str_ref = std::string_view(c0_const_string->getDataAt(0)); + } + else if (const auto * c0_const_fixed_string = checkAndGetColumn(&c0_const->getDataColumn()); c0_const_fixed_string) + { + c0_const_str_ref = std::string_view(c0_const_fixed_string->getDataAt(0)); + } + else + throw Exception("Logical error: ColumnConst contains not String nor FixedString column", ErrorCodes::ILLEGAL_COLUMN); + } + return c0_const_str_ref; + } + template bool executeStringWithCollator( Block & block, @@ -720,10 +753,13 @@ class FunctionComparison : public IFunction using ResultType = typename ResultColumnType::value_type; using StringImpl = StringComparisonWithCollatorImpl, ResultType>; + std::string_view c0_const_str_ref = genConstStrRef(c0_const); + std::string_view c1_const_str_ref = genConstStrRef(c1_const); + if (c0_const && c1_const) { ResultType res = 0; - StringImpl::constantConstant(c0_const->getValue(), c1_const->getValue(), collator, res); + StringImpl::constantConstant(c0_const_str_ref, c1_const_str_ref, collator, res); block.getByPosition(result).column = block.getByPosition(result).type->createColumnConst(c0_const->size(), toField(res)); return true; } @@ -745,12 +781,12 @@ class FunctionComparison : public IFunction StringImpl::stringVectorConstant( c0_string->getChars(), c0_string->getOffsets(), - c1_const->getValue(), + c1_const_str_ref, collator, c_res->getData()); else if (c0_const && c1_string) StringImpl::constantStringVector( - c0_const->getValue(), + c0_const_str_ref, c1_string->getChars(), c1_string->getOffsets(), collator, @@ -770,8 +806,8 @@ class FunctionComparison : public IFunction template bool executeString(Block & block, size_t result, const IColumn * c0, const IColumn * c1) const { - const ColumnString * c0_string = checkAndGetColumn(c0); - const ColumnString * c1_string = checkAndGetColumn(c1); + const auto * c0_string = checkAndGetColumn(c0); + const auto * c1_string = checkAndGetColumn(c1); const ColumnConst * c0_const = checkAndGetColumnConstStringOrFixedString(c0); const ColumnConst * c1_const = checkAndGetColumnConstStringOrFixedString(c1); diff --git a/dbms/src/Functions/FunctionsConversion.cpp b/dbms/src/Functions/FunctionsConversion.cpp index 118574ed33d..0446f76bd51 100644 --- a/dbms/src/Functions/FunctionsConversion.cpp +++ b/dbms/src/Functions/FunctionsConversion.cpp @@ -240,6 +240,7 @@ void registerFunctionsConversion(FunctionFactory & factory) factory.registerFunction(); factory.registerFunction(); + factory.registerFunction(); factory.registerFunction>(); factory.registerFunction>(); factory.registerFunction>(); diff --git a/dbms/src/Functions/FunctionsConversion.h b/dbms/src/Functions/FunctionsConversion.h index ddf64a70ca1..e8333ceeeea 100644 --- a/dbms/src/Functions/FunctionsConversion.h +++ b/dbms/src/Functions/FunctionsConversion.h @@ -1751,6 +1751,120 @@ class FunctionDateFormat : public IFunction } }; +class FunctionGetFormat : public IFunction +{ +private: + static String get_format(const StringRef & time_type, const StringRef & location) + { + if (time_type == "DATE") + { + if (location == "USA") + return "%m.%d.%Y"; + else if (location == "JIS") + return "%Y-%m-%d"; + else if (location == "ISO") + return "%Y-%m-%d"; + else if (location == "EUR") + return "%d.%m.%Y"; + else if (location == "INTERNAL") + return "%Y%m%d"; + } + else if (time_type == "DATETIME" || time_type == "TIMESTAMP") + { + if (location == "USA") + return "%Y-%m-%d %H.%i.%s"; + else if (location == "JIS") + return "%Y-%m-%d %H:%i:%s"; + else if (location == "ISO") + return "%Y-%m-%d %H:%i:%s"; + else if (location == "EUR") + return "%Y-%m-%d %H.%i.%s"; + else if (location == "INTERNAL") + return "%Y%m%d%H%i%s"; + } + else if (time_type == "TIME") + { + if (location == "USA") + return "%h:%i:%s %p"; + else if (location == "JIS") + return "%H:%i:%s"; + else if (location == "ISO") + return "%H:%i:%s"; + else if (location == "EUR") + return "%H.%i.%s"; + else if (location == "INTERNAL") + return "%H%i%s"; + } + return ""; + } + +public: + static constexpr auto name = "getFormat"; + static FunctionPtr create(const Context &) { return std::make_shared(); }; + + String getName() const override { return name; } + + size_t getNumberOfArguments() const override { return 2; } + + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override + { + if (!arguments[0].type->isString()) + throw Exception("First argument for function " + getName() + " must be String", ErrorCodes::ILLEGAL_COLUMN); + if (!arguments[1].type->isString()) + throw Exception("Second argument for function " + getName() + " must be String", ErrorCodes::ILLEGAL_COLUMN); + + return std::make_shared(); + } + + bool useDefaultImplementationForConstants() const override { return true; } + + /** + * @brief The first argument is designed as a MySQL reserved word. You would encounter a syntax error when wrap it around with quote in SQL. + * For example, select GET_FORMAT("DATE", "USA") will fail. Removing the quote can solve the problem. + * Thus the first argument should always be a ColumnConst. See details in the link below: + * https://dev.mysql.com/doc/refman/5.7/en/date-and-time-functions.html#function_get-format + * + * @return ColumnNumbers + */ + ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {0}; } + + void executeImpl(Block & block, const ColumnNumbers & arguments, const size_t result) const override + { + const auto * location_col = checkAndGetColumn(block.getByPosition(arguments[1]).column.get()); + assert(location_col); + size_t size = location_col->size(); + const auto & time_type_col = block.getByPosition(arguments[0]).column; + auto col_to = ColumnString::create(); + + if (time_type_col->isColumnConst()) + { + const auto & time_type_col_const = checkAndGetColumnConst(time_type_col.get()); + const auto & time_type = time_type_col_const->getValue(); + + ColumnString::Chars_t & data_to = col_to->getChars(); + ColumnString::Offsets & offsets_to = col_to->getOffsets(); + auto max_length = 18; + data_to.resize(size * max_length); + offsets_to.resize(size); + WriteBufferFromVector write_buffer(data_to); + for (size_t i = 0; i < size; ++i) + { + const auto & location = location_col->getDataAt(i); + const auto & result = get_format(StringRef(time_type), location); + write_buffer.write(result.c_str(), result.size()); + writeChar(0, write_buffer); + offsets_to[i] = write_buffer.count(); + } + data_to.resize(write_buffer.count()); + block.getByPosition(result).column = std::move(col_to); + } + else + { + throw Exception("First argument for function " + getName() + " must be String constant", ErrorCodes::ILLEGAL_COLUMN); + } + } +}; + struct NameStrToDateDate { static constexpr auto name = "strToDateDate"; diff --git a/dbms/src/Functions/FunctionsDuration.cpp b/dbms/src/Functions/FunctionsDuration.cpp index ea7b86ac670..9ccafd2794d 100644 --- a/dbms/src/Functions/FunctionsDuration.cpp +++ b/dbms/src/Functions/FunctionsDuration.cpp @@ -97,6 +97,57 @@ void FunctionDurationSplit::executeImpl(Block & block, const ColumnNumbers ErrorCodes::ILLEGAL_COLUMN); }; +template +DataTypePtr FunctionMyDurationToSec::getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const +{ + if (!arguments[0].type->isMyTime()) + { + throw Exception( + fmt::format("Illegal type {} of the first argument of function {}", arguments[0].type->getName(), getName()), + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + } + return std::make_shared(); +} + +template +void FunctionMyDurationToSec::executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) const +{ + const auto * from_type = checkAndGetDataType(block.getByPosition(arguments[0]).type.get()); + if (from_type == nullptr) + { + throw Exception( + fmt::format( + "Illegal column {} of the first argument of function {}", + block.getByPosition(arguments[0]).column->getName(), + name), + ErrorCodes::ILLEGAL_COLUMN); + } + + using FromFieldType = typename DataTypeMyDuration::FieldType; + const auto * col_from = checkAndGetColumn>(block.getByPosition(arguments[0]).column.get()); + if (col_from != nullptr) + { + const typename ColumnVector::Container & vec_from = col_from->getData(); + const size_t size = vec_from.size(); + auto col_to = ColumnVector::create(size); + typename ColumnVector::Container & vec_to = col_to->getData(); + + for (size_t i = 0; i < size; ++i) + { + MyDuration val(vec_from[i], from_type->getFsp()); + vec_to[i] = Impl::apply(val); + } + block.getByPosition(result).column = std::move(col_to); + } + else + throw Exception( + fmt::format( + "Illegal column {} of the first argument of function {}", + block.getByPosition(arguments[0]).column->getName(), + name), + ErrorCodes::ILLEGAL_COLUMN); +} + struct DurationSplitHourImpl { static constexpr auto name = "hour"; @@ -133,11 +184,27 @@ struct DurationSplitMicroSecondImpl } }; +struct TiDBTimeToSecTransformerImpl +{ + static constexpr auto name = "tidbTimeToSec"; + static Int64 apply(const MyDuration & val) + { + Int64 sign = 1; + if (val.isNeg()) + { + sign = -1; + } + return sign * (val.hours() * 3600 + val.minutes() * 60 + val.seconds()); + } +}; + using FunctionDurationHour = FunctionDurationSplit; using FunctionDurationMinute = FunctionDurationSplit; using FunctionDurationSecond = FunctionDurationSplit; using FunctionDurationMicroSecond = FunctionDurationSplit; +using FunctionToTiDBTimeToSec = FunctionMyDurationToSec; + void registerFunctionsDuration(FunctionFactory & factory) { factory.registerFunction(); @@ -146,5 +213,7 @@ void registerFunctionsDuration(FunctionFactory & factory) factory.registerFunction(); factory.registerFunction(); factory.registerFunction(); + + factory.registerFunction(); } } // namespace DB diff --git a/dbms/src/Functions/FunctionsDuration.h b/dbms/src/Functions/FunctionsDuration.h index 4247cde03ff..5bc54d425f4 100644 --- a/dbms/src/Functions/FunctionsDuration.h +++ b/dbms/src/Functions/FunctionsDuration.h @@ -69,4 +69,23 @@ class FunctionDurationSplit : public IFunction void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) const override; }; +template +class FunctionMyDurationToSec : public IFunction +{ +public: + static constexpr auto name = Impl::name; + + static FunctionPtr create(const Context &) { return std::make_shared(); }; + + String getName() const override { return name; } + + size_t getNumberOfArguments() const override { return 1; } + + bool useDefaultImplementationForConstants() const override { return true; } + + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override; + + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) const override; +}; + } // namespace DB \ No newline at end of file diff --git a/dbms/src/Functions/FunctionsString.cpp b/dbms/src/Functions/FunctionsString.cpp index b9f20e45134..76022b983ad 100644 --- a/dbms/src/Functions/FunctionsString.cpp +++ b/dbms/src/Functions/FunctionsString.cpp @@ -992,7 +992,7 @@ class FunctionStringOrArrayToT : public IFunction void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) const override { const ColumnPtr column = block.getByPosition(arguments[0]).column; - if (const ColumnString * col = checkAndGetColumn(column.get())) + if (const auto * col = checkAndGetColumn(column.get())) { auto col_res = ColumnVector::create(); @@ -1002,7 +1002,7 @@ class FunctionStringOrArrayToT : public IFunction block.getByPosition(result).column = std::move(col_res); } - else if (const ColumnFixedString * col = checkAndGetColumn(column.get())) + else if (const auto * col = checkAndGetColumn(column.get())) { if (Impl::is_fixed_to_constant) { @@ -1022,7 +1022,7 @@ class FunctionStringOrArrayToT : public IFunction block.getByPosition(result).column = std::move(col_res); } } - else if (const ColumnArray * col = checkAndGetColumn(column.get())) + else if (const auto * col = checkAndGetColumn(column.get())) { auto col_res = ColumnVector::create(); @@ -1081,13 +1081,13 @@ class FunctionReverse : public IFunction void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) const override { const ColumnPtr column = block.getByPosition(arguments[0]).column; - if (const ColumnString * col = checkAndGetColumn(column.get())) + if (const auto * col = checkAndGetColumn(column.get())) { auto col_res = ColumnString::create(); ReverseImpl::vector(col->getChars(), col->getOffsets(), col_res->getChars(), col_res->getOffsets()); block.getByPosition(result).column = std::move(col_res); } - else if (const ColumnFixedString * col = checkAndGetColumn(column.get())) + else if (const auto * col = checkAndGetColumn(column.get())) { auto col_res = ColumnFixedString::create(col->getN()); ReverseImpl::vectorFixed(col->getChars(), col->getN(), col_res->getChars()); @@ -1131,7 +1131,7 @@ class FunctionJsonLength : public IFunction void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) const override { const ColumnPtr column = block.getByPosition(arguments[0]).column; - if (const ColumnString * col = checkAndGetColumn(column.get())) + if (const auto * col = checkAndGetColumn(column.get())) { auto col_res = ColumnUInt64::create(); typename ColumnUInt64::Container & vec_col_res = col_res->getData(); @@ -1232,8 +1232,8 @@ class ConcatImpl : public IFunction const IColumn * c0 = block.getByPosition(arguments[0]).column.get(); const IColumn * c1 = block.getByPosition(arguments[1]).column.get(); - const ColumnString * c0_string = checkAndGetColumn(c0); - const ColumnString * c1_string = checkAndGetColumn(c1); + const auto * c0_string = checkAndGetColumn(c0); + const auto * c1_string = checkAndGetColumn(c1); const ColumnConst * c0_const_string = checkAndGetColumnConst(c0); const ColumnConst * c1_const_string = checkAndGetColumnConst(c1); @@ -1552,7 +1552,7 @@ class FunctionSubstring : public IFunction if (number_of_arguments == 3) column_length = block.getByPosition(arguments[2]).column; - const ColumnConst * column_start_const = checkAndGetColumn(column_start.get()); + const auto * column_start_const = checkAndGetColumn(column_start.get()); const ColumnConst * column_length_const = nullptr; if (number_of_arguments == 3) @@ -1572,9 +1572,9 @@ class FunctionSubstring : public IFunction throw Exception("Third argument provided for function substring could not be negative.", ErrorCodes::ARGUMENT_OUT_OF_BOUND); } - if (const ColumnString * col = checkAndGetColumn(column_string.get())) + if (const auto * col = checkAndGetColumn(column_string.get())) executeForSource(column_start, column_length, column_start_const, column_length_const, start_value, length_value, block, result, StringSource(*col)); - else if (const ColumnFixedString * col = checkAndGetColumn(column_string.get())) + else if (const auto * col = checkAndGetColumn(column_string.get())) executeForSource(column_start, column_length, column_start_const, column_length_const, start_value, length_value, block, result, FixedStringSource(*col)); else if (const ColumnConst * col = checkAndGetColumnConst(column_string.get())) executeForSource(column_start, column_length, column_start_const, column_length_const, start_value, length_value, block, result, ConstSource(*col)); @@ -1676,7 +1676,7 @@ class FunctionSubstringUTF8 : public IFunction return true; } - const ColumnString * col = checkAndGetColumn(column_string.get()); + const auto * col = checkAndGetColumn(column_string.get()); assert(col); auto col_res = ColumnString::create(); getVectorConstConstFunc(implicit_length, is_positive)(col->getChars(), col->getOffsets(), start_abs, length, col_res->getChars(), col_res->getOffsets()); @@ -1732,7 +1732,7 @@ class FunctionSubstringUTF8 : public IFunction // convert to vector if string is const. ColumnPtr full_column_string = column_string->isColumnConst() ? column_string->convertToFullColumnIfConst() : column_string; - const ColumnString * col = checkAndGetColumn(full_column_string.get()); + const auto * col = checkAndGetColumn(full_column_string.get()); assert(col); auto col_res = ColumnString::create(); if (implicit_length) @@ -1869,7 +1869,7 @@ class FunctionRightUTF8 : public IFunction using LengthFieldType = typename LengthType::FieldType; auto col_res = ColumnString::create(); - if (const ColumnString * col_string = checkAndGetColumn(column_string.get())) + if (const auto * col_string = checkAndGetColumn(column_string.get())) { if (column_length->isColumnConst()) { @@ -1897,7 +1897,7 @@ class FunctionRightUTF8 : public IFunction else if (const ColumnConst * col_const_string = checkAndGetColumnConst(column_string.get())) { // const vector - const ColumnString * col_string_from_const = checkAndGetColumn(col_const_string->getDataColumnPtr().get()); + const auto * col_string_from_const = checkAndGetColumn(col_const_string->getDataColumnPtr().get()); assert(col_string_from_const); // When useDefaultImplementationForConstants is true, string and length are not both constants assert(!column_length->isColumnConst()); @@ -1993,7 +1993,7 @@ class FunctionAppendTrailingCharIfAbsent : public IFunction if (!checkColumnConst(column_char.get())) throw Exception(fmt::format("Second argument of function {} must be a constant string", getName()), ErrorCodes::ILLEGAL_COLUMN); - String trailing_char_str = static_cast(*column_char).getValue(); + auto trailing_char_str = static_cast(*column_char).getValue(); if (trailing_char_str.size() != 1) throw Exception(fmt::format("Second argument of function {} must be a one-character string", getName()), ErrorCodes::BAD_ARGUMENTS); @@ -2101,7 +2101,7 @@ class TrimImpl : public IFunction void executeTrim(Block & block, const ColumnNumbers & arguments, const size_t result) const { const IColumn * c0 = block.getByPosition(arguments[0]).column.get(); - const ColumnString * c0_string = checkAndGetColumn(c0); + const auto * c0_string = checkAndGetColumn(c0); const ColumnConst * c0_const_string = checkAndGetColumnConst(c0); auto c_res = ColumnString::create(); @@ -2121,8 +2121,8 @@ class TrimImpl : public IFunction const IColumn * c0 = block.getByPosition(arguments[0]).column.get(); const IColumn * c1 = block.getByPosition(arguments[1]).column.get(); - const ColumnString * c0_string = checkAndGetColumn(c0); - const ColumnString * c1_string = checkAndGetColumn(c1); + const auto * c0_string = checkAndGetColumn(c0); + const auto * c1_string = checkAndGetColumn(c1); const ColumnConst * c0_const_string = checkAndGetColumnConst(c0); const ColumnConst * c1_const_string = checkAndGetColumnConst(c1); @@ -2202,7 +2202,7 @@ class TrimUTF8Impl : public IFunction void executeTrim(Block & block, const ColumnNumbers & arguments, const size_t result) const { const IColumn * c0 = block.getByPosition(arguments[0]).column.get(); - const ColumnString * c0_string = checkAndGetColumn(c0); + const auto * c0_string = checkAndGetColumn(c0); const ColumnConst * c0_const_string = checkAndGetColumnConst(c0); auto c_res = ColumnString::create(); @@ -2225,7 +2225,7 @@ class TrimUTF8Impl : public IFunction const IColumn * c0 = block.getByPosition(arguments[0]).column.get(); const IColumn * c1 = block.getByPosition(arguments[1]).column.get(); - const ColumnString * c0_string = checkAndGetColumn(c0); + const auto * c0_string = checkAndGetColumn(c0); const ColumnConst * c0_const_string = checkAndGetColumnConst(c0); const ColumnConst * c1_const_string = checkAndGetColumnConst(c1); const auto * column_trim_string = checkAndGetColumn(c1_const_string->getDataColumnPtr().get()); @@ -2716,7 +2716,7 @@ class FunctionTiDBTrim : public IFunction ColumnPtr & column_data = block.getByPosition(arguments[0]).column; auto res_col = ColumnString::create(); - const ColumnString * data_col = checkAndGetColumn(column_data.get()); + const auto * data_col = checkAndGetColumn(column_data.get()); static constexpr std::string_view default_rem = " "; static const auto * remstr_ptr = reinterpret_cast(default_rem.data()); @@ -2738,25 +2738,25 @@ class FunctionTiDBTrim : public IFunction if (data_const && !remstr_const) { const ColumnConst * data_col = checkAndGetColumnConst(column_data.get()); - const ColumnString * remstr_col = checkAndGetColumn(column_remstr.get()); + const auto * remstr_col = checkAndGetColumn(column_remstr.get()); - const std::string data = data_col->getValue(); + const auto data = data_col->getValue(); const auto * data_ptr = reinterpret_cast(data.c_str()); constVector(is_ltrim, is_rtrim, data_ptr, data.size() + 1, remstr_col->getChars(), remstr_col->getOffsets(), res_col->getChars(), res_col->getOffsets()); } else if (remstr_const && !data_const) { const ColumnConst * remstr_col = checkAndGetColumnConst(column_remstr.get()); - const ColumnString * data_col = checkAndGetColumn(column_data.get()); + const auto * data_col = checkAndGetColumn(column_data.get()); - const std::string remstr = remstr_col->getValue(); + const auto remstr = remstr_col->getValue(); const auto * remstr_ptr = reinterpret_cast(remstr.c_str()); vectorConst(is_ltrim, is_rtrim, data_col->getChars(), data_col->getOffsets(), remstr_ptr, remstr.size() + 1, res_col->getChars(), res_col->getOffsets()); } else { - const ColumnString * data_col = checkAndGetColumn(column_data.get()); - const ColumnString * remstr_col = checkAndGetColumn(column_remstr.get()); + const auto * data_col = checkAndGetColumn(column_data.get()); + const auto * remstr_col = checkAndGetColumn(column_remstr.get()); vectorVector(is_ltrim, is_rtrim, data_col->getChars(), data_col->getOffsets(), remstr_col->getChars(), remstr_col->getOffsets(), res_col->getChars(), res_col->getOffsets()); } @@ -2769,7 +2769,7 @@ class FunctionTiDBTrim : public IFunction ColumnPtr & column_direction = block.getByPosition(arguments[2]).column; if (!column_direction->isColumnConst()) throw Exception(fmt::format("3nd argument of function {} must be constant.", getName())); - const ColumnConst * direction_col = checkAndGetColumn(column_direction.get()); + const auto * direction_col = checkAndGetColumn(column_direction.get()); static constexpr Int64 trim_both_default = 0; // trims from both direction by default static constexpr Int64 trim_both = 1; // trims from both direction with explicit notation @@ -2989,7 +2989,7 @@ class TidbPadImpl { continue; } - int32_t len = static_cast(column_length->getInt(i)); + auto len = static_cast(column_length->getInt(i)); if (len <= 0) { len = 0; @@ -3051,7 +3051,7 @@ class TidbPadImpl } else { - const ColumnString * column_string = checkAndGetColumn(column_string_ptr.get()); + const auto * column_string = checkAndGetColumn(column_string_ptr.get()); const ColumnString::Offsets & string_offsets = column_string->getOffsets(); const ColumnString::Chars_t & string_data = column_string->getChars(); @@ -3233,7 +3233,7 @@ class TidbPadImpl return true; } - ColumnString::Offset tmp_target_len = static_cast(target_len); + auto tmp_target_len = static_cast(target_len); ColumnString::Offset per_pad_offset = 0; ColumnString::Offset pad_bytes = 0; ColumnString::Offset left = 0; @@ -3300,7 +3300,7 @@ class TidbPadImpl return true; } - ColumnString::Offset tmp_target_len = static_cast(target_len); + auto tmp_target_len = static_cast(target_len); if (data_len < tmp_target_len) { ColumnString::Offset left = tmp_target_len - data_len; @@ -3421,7 +3421,7 @@ class PadImpl : public IFunction ColumnPtr column_length = block.getByPosition(arguments[1]).column; ColumnPtr column_padding = block.getByPosition(arguments[2]).column; - const ColumnConst * column_length_const = checkAndGetColumn(column_length.get()); + const auto * column_length_const = checkAndGetColumn(column_length.get()); const ColumnConst * column_padding_const = checkAndGetColumnConst(column_padding.get()); Int64 length_value = 0; @@ -3441,7 +3441,7 @@ class PadImpl : public IFunction auto c_res = ColumnString::create(); - if (const ColumnString * col = checkAndGetColumn(column_string.get())) + if (const auto * col = checkAndGetColumn(column_string.get())) pad, StringSink>( StringSource(*col), ConstSource(*column_padding_const), @@ -3548,7 +3548,7 @@ class PadUTF8Impl : public IFunction ColumnPtr column_length = block.getByPosition(arguments[1]).column; ColumnPtr column_padding = block.getByPosition(arguments[2]).column; - const ColumnConst * column_length_const = checkAndGetColumn(column_length.get()); + const auto * column_length_const = checkAndGetColumn(column_length.get()); const ColumnConst * column_padding_const = checkAndGetColumnConst(column_padding.get()); Int64 length_value = 0; @@ -3568,7 +3568,7 @@ class PadUTF8Impl : public IFunction auto c_res = ColumnString::create(); const auto * column_padding_string = checkAndGetColumn(column_padding_const->getDataColumnPtr().get()); - if (const ColumnString * col = checkAndGetColumn(column_string.get())) + if (const auto * col = checkAndGetColumn(column_string.get())) vector(col->getChars(), col->getOffsets(), length_value, column_padding_string->getChars(), column_padding_string->getOffsets(), c_res->getChars(), c_res->getOffsets()); else if (const ColumnConst * col = checkAndGetColumnConst(column_string.get())) { @@ -4114,8 +4114,8 @@ class FunctionASCII : public IFunction void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) const override { const IColumn * c0_col = block.getByPosition(arguments[0]).column.get(); - const ColumnConst * c0_const = checkAndGetColumn(c0_col); - const ColumnString * c0_string = checkAndGetColumn(c0_col); + const auto * c0_const = checkAndGetColumn(c0_col); + const auto * c0_string = checkAndGetColumn(c0_col); Field res_field; int val_num = c0_col->size(); @@ -4165,8 +4165,8 @@ class FunctionLength : public IFunction void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) const override { const IColumn * c0_col = block.getByPosition(arguments[0]).column.get(); - const ColumnConst * c0_const = checkAndGetColumn(c0_col); - const ColumnString * c0_string = checkAndGetColumn(c0_col); + const auto * c0_const = checkAndGetColumn(c0_col); + const auto * c0_string = checkAndGetColumn(c0_col); Field res_field; int val_num = c0_col->size(); @@ -4215,13 +4215,13 @@ class FunctionPosition : public IFunction void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) const override { const IColumn * c0_col = block.getByPosition(arguments[0]).column.get(); - const ColumnConst * c0_const = checkAndGetColumn(c0_col); - const ColumnString * c0_string = checkAndGetColumn(c0_col); + const auto * c0_const = checkAndGetColumn(c0_col); + const auto * c0_string = checkAndGetColumn(c0_col); Field c0_field; const IColumn * c1_col = block.getByPosition(arguments[1]).column.get(); - const ColumnConst * c1_const = checkAndGetColumn(c1_col); - const ColumnString * c1_string = checkAndGetColumn(c1_col); + const auto * c1_const = checkAndGetColumn(c1_col); + const auto * c1_string = checkAndGetColumn(c1_col); Field c1_field; if ((c0_const == nullptr && c0_string == nullptr) || (c1_const == nullptr && c1_string == nullptr)) @@ -4331,7 +4331,7 @@ class FunctionSubStringIndex : public IFunction column_str = column_str->isColumnConst() ? column_str->convertToFullColumnIfConst() : column_str; if (delim_const && count_const) { - const ColumnString * str_col = checkAndGetColumn(column_str.get()); + const auto * str_col = checkAndGetColumn(column_str.get()); const ColumnConst * delim_col = checkAndGetColumnConst(column_delim.get()); const ColumnConst * count_col = checkAndGetColumnConst>(column_count.get()); if (str_col == nullptr || delim_col == nullptr || count_col == nullptr) @@ -4339,7 +4339,7 @@ class FunctionSubStringIndex : public IFunction return false; } auto col_res = ColumnString::create(); - IntType count = count_col->getValue(); + auto count = count_col->getValue(); vectorConstConst( str_col->getChars(), str_col->getOffsets(), @@ -4353,9 +4353,9 @@ class FunctionSubStringIndex : public IFunction { column_delim = column_delim->isColumnConst() ? column_delim->convertToFullColumnIfConst() : column_delim; column_count = column_count->isColumnConst() ? column_count->convertToFullColumnIfConst() : column_count; - const ColumnString * str_col = checkAndGetColumn(column_str.get()); - const ColumnString * delim_col = checkAndGetColumn(column_delim.get()); - const ColumnVector * count_col = checkAndGetColumn>(column_count.get()); + const auto * str_col = checkAndGetColumn(column_str.get()); + const auto * delim_col = checkAndGetColumn(column_delim.get()); + const auto * count_col = checkAndGetColumn>(column_count.get()); if (str_col == nullptr || delim_col == nullptr || count_col == nullptr) { return false; @@ -4573,7 +4573,9 @@ class FormatImpl : public IFunction using NumberFieldType = typename NumberType::FieldType; using NumberColVec = std::conditional_t, ColumnDecimal, ColumnVector>; const auto * number_raw = block.getByPosition(arguments[0]).column.get(); + TiDBDecimalRoundInfo info{number_type, number_type}; + info.output_prec = info.output_prec < 65 ? info.output_prec + 1 : 65; return getPrecisionType(precision_base_type, [&](const auto & precision_type, bool) { using PrecisionType = std::decay_t; @@ -4723,10 +4725,11 @@ class FormatImpl : public IFunction static void format( T number, size_t max_num_decimals, - const TiDBDecimalRoundInfo & info, + TiDBDecimalRoundInfo & info, ColumnString::Chars_t & res_data, ColumnString::Offsets & res_offsets) { + info.output_scale = std::min(max_num_decimals, static_cast(info.input_scale)); auto round_number = round(number, max_num_decimals, info); std::string round_number_str = number2Str(round_number, info); std::string buffer = Format::apply(round_number_str, max_num_decimals); @@ -4870,7 +4873,7 @@ class FunctionFormatWithLocale : public IFunction } else { - const String value = locale_const->getValue(); + const auto value = locale_const->getValue(); if (!boost::iequals(value, supported_locale)) { const auto & msg = genWarningMsg(value); diff --git a/dbms/src/Functions/bitShiftRight.cpp b/dbms/src/Functions/bitShiftRight.cpp index 961f7459f68..90b365771de 100644 --- a/dbms/src/Functions/bitShiftRight.cpp +++ b/dbms/src/Functions/bitShiftRight.cpp @@ -13,6 +13,9 @@ // limitations under the License. #include +#include + +#include namespace DB { @@ -29,7 +32,18 @@ struct BitShiftRightImpl template static Result apply(A a, B b) { - return static_cast(a) >> static_cast(b); + // It is an undefined behavior for shift operation in c++ that the right operand is negative or greater than + // or equal to the number of digits of the bits in the (promoted) left operand. + // See https://en.cppreference.com/w/cpp/language/operator_arithmetic for details. + if (static_cast(b) >= std::numeric_limits(a))>::digits) + { + return static_cast(0); + } + // Note that we do not consider the case that the right operand is negative, + // since other types will all be cast to uint64 before shift operation + // according to DAGExpressionAnalyzerHelper::buildBitwiseFunction. + // Therefore, we simply suppress clang-tidy checking here. + return static_cast(a) >> static_cast(b); // NOLINT(clang-analyzer-core.UndefinedBinaryOperatorResult) } template static Result apply(A, B, UInt8 &) @@ -87,4 +101,4 @@ void registerFunctionBitShiftRight(FunctionFactory & factory) factory.registerFunction(); } -} // namespace DB \ No newline at end of file +} // namespace DB diff --git a/dbms/src/Functions/tests/gtest_bitshiftright.cpp b/dbms/src/Functions/tests/gtest_bitshiftright.cpp new file mode 100644 index 00000000000..a4af6336099 --- /dev/null +++ b/dbms/src/Functions/tests/gtest_bitshiftright.cpp @@ -0,0 +1,273 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +namespace DB +{ +namespace tests +{ +class TestFunctionBitShiftRight : public DB::tests::FunctionTest +{ +}; + +#define ASSERT_BITSHIFTRIGHT(t1, t2, result) \ + ASSERT_COLUMN_EQ(result, executeFunction("bitShiftRight", {t1, t2})) + +TEST_F(TestFunctionBitShiftRight, Simple) +try +{ + ASSERT_BITSHIFTRIGHT(createColumn>({8}), + createColumn>({2}), + createColumn>({2})); +} +CATCH + +/// Note: Only IntX and UIntX will be received by BitShiftRight, others will be casted by TiDB planner. +/// Note: BitShiftRight will further cast other types to UInt64 before doing shift. +TEST_F(TestFunctionBitShiftRight, TypePromotion) +try +{ + // Type Promotion + ASSERT_BITSHIFTRIGHT(createColumn>({-1}), createColumn>({1}), createColumn>({9223372036854775807ull})); + ASSERT_BITSHIFTRIGHT(createColumn>({-1}), createColumn>({1}), createColumn>({9223372036854775807ull})); + ASSERT_BITSHIFTRIGHT(createColumn>({-1}), createColumn>({1}), createColumn>({9223372036854775807ull})); + ASSERT_BITSHIFTRIGHT(createColumn>({-1}), createColumn>({1}), createColumn>({9223372036854775807ull})); + + ASSERT_BITSHIFTRIGHT(createColumn>({1}), createColumn>({0}), createColumn>({1})); + ASSERT_BITSHIFTRIGHT(createColumn>({1}), createColumn>({0}), createColumn>({1})); + ASSERT_BITSHIFTRIGHT(createColumn>({1}), createColumn>({0}), createColumn>({1})); + ASSERT_BITSHIFTRIGHT(createColumn>({1}), createColumn>({0}), createColumn>({1})); + + // Type Promotion across signed/unsigned + ASSERT_BITSHIFTRIGHT(createColumn>({-1}), createColumn>({0}), createColumn>({18446744073709551615ull})); + ASSERT_BITSHIFTRIGHT(createColumn>({-1}), createColumn>({0}), createColumn>({18446744073709551615ull})); + ASSERT_BITSHIFTRIGHT(createColumn>({1}), createColumn>({0}), createColumn>({1})); + ASSERT_BITSHIFTRIGHT(createColumn>({1}), createColumn>({0}), createColumn>({1})); +} +CATCH + +TEST_F(TestFunctionBitShiftRight, Nullable) +try +{ + // Non Nullable + ASSERT_BITSHIFTRIGHT(createColumn({1}), createColumn({0}), createColumn({1})); + ASSERT_BITSHIFTRIGHT(createColumn({1}), createColumn({0}), createColumn({1})); + ASSERT_BITSHIFTRIGHT(createColumn({1}), createColumn({0}), createColumn({1})); + ASSERT_BITSHIFTRIGHT(createColumn({1}), createColumn({0}), createColumn({1})); + + ASSERT_BITSHIFTRIGHT(createColumn({1}), createColumn({0}), createColumn({1})); + ASSERT_BITSHIFTRIGHT(createColumn({1}), createColumn({0}), createColumn({1})); + ASSERT_BITSHIFTRIGHT(createColumn({1}), createColumn({0}), createColumn({1})); + ASSERT_BITSHIFTRIGHT(createColumn({1}), createColumn({0}), createColumn({1})); + + ASSERT_BITSHIFTRIGHT(createColumn({1}), createColumn({0}), createColumn({1})); + ASSERT_BITSHIFTRIGHT(createColumn({1}), createColumn({0}), createColumn({1})); + ASSERT_BITSHIFTRIGHT(createColumn({1}), createColumn({0}), createColumn({1})); + ASSERT_BITSHIFTRIGHT(createColumn({1}), createColumn({0}), createColumn({1})); + + // Across Nullable and non-Nullable + ASSERT_BITSHIFTRIGHT(createColumn({1}), createColumn>({0}), createColumn>({1})); + ASSERT_BITSHIFTRIGHT(createColumn({1}), createColumn>({0}), createColumn>({1})); + ASSERT_BITSHIFTRIGHT(createColumn({1}), createColumn>({0}), createColumn>({1})); + ASSERT_BITSHIFTRIGHT(createColumn({1}), createColumn>({0}), createColumn>({1})); + + ASSERT_BITSHIFTRIGHT(createColumn({1}), createColumn>({0}), createColumn>({1})); + ASSERT_BITSHIFTRIGHT(createColumn({1}), createColumn>({0}), createColumn>({1})); + ASSERT_BITSHIFTRIGHT(createColumn({1}), createColumn>({0}), createColumn>({1})); + ASSERT_BITSHIFTRIGHT(createColumn({1}), createColumn>({0}), createColumn>({1})); + + ASSERT_BITSHIFTRIGHT(createColumn({1}), createColumn>({0}), createColumn>({1})); + ASSERT_BITSHIFTRIGHT(createColumn({1}), createColumn>({0}), createColumn>({1})); + ASSERT_BITSHIFTRIGHT(createColumn({1}), createColumn>({0}), createColumn>({1})); + ASSERT_BITSHIFTRIGHT(createColumn({1}), createColumn>({0}), createColumn>({1})); + + ASSERT_BITSHIFTRIGHT(createColumn>({1}), createColumn({0}), createColumn>({1})); + ASSERT_BITSHIFTRIGHT(createColumn>({1}), createColumn({0}), createColumn>({1})); + ASSERT_BITSHIFTRIGHT(createColumn>({1}), createColumn({0}), createColumn>({1})); + ASSERT_BITSHIFTRIGHT(createColumn>({1}), createColumn({0}), createColumn>({1})); + + ASSERT_BITSHIFTRIGHT(createColumn>({1}), createColumn({0}), createColumn>({1})); + ASSERT_BITSHIFTRIGHT(createColumn>({1}), createColumn({0}), createColumn>({1})); + ASSERT_BITSHIFTRIGHT(createColumn>({1}), createColumn({0}), createColumn>({1})); + ASSERT_BITSHIFTRIGHT(createColumn>({1}), createColumn({0}), createColumn>({1})); + + ASSERT_BITSHIFTRIGHT(createColumn>({1}), createColumn({0}), createColumn>({1})); + ASSERT_BITSHIFTRIGHT(createColumn>({1}), createColumn({0}), createColumn>({1})); + ASSERT_BITSHIFTRIGHT(createColumn>({1}), createColumn({0}), createColumn>({1})); + ASSERT_BITSHIFTRIGHT(createColumn>({1}), createColumn({0}), createColumn>({1})); +} +CATCH + +TEST_F(TestFunctionBitShiftRight, TypeCastWithConst) +try +{ + /// need test these kinds of columns: + /// 1. ColumnVector + /// 2. ColumnVector + /// 3. ColumnConst + /// 4. ColumnConst, value != null + /// 5. ColumnConst, value = null + + ASSERT_BITSHIFTRIGHT(createColumn({0, 0, 1, 1}), createColumn({0, 1, 0, 1}), createColumn({0, 0, 1, 0})); + ASSERT_BITSHIFTRIGHT(createColumn({0, 0, 1, 1}), createColumn>({0, 1, std::nullopt, std::nullopt}), createColumn>({0, 0, std::nullopt, std::nullopt})); + ASSERT_BITSHIFTRIGHT(createColumn({0, 0, 1, 1}), createConstColumn(4, 0), createColumn({0, 0, 1, 1})); + ASSERT_BITSHIFTRIGHT(createColumn({0, 0, 1, 1}), createConstColumn>(4, 0), createColumn({0, 0, 1, 1})); + ASSERT_BITSHIFTRIGHT(createColumn({0, 0, 1, 1}), createConstColumn>(4, std::nullopt), createConstColumn>(4, std::nullopt)); // become const in wrapInNullable + + ASSERT_BITSHIFTRIGHT(createColumn>({0, 1, std::nullopt, std::nullopt}), createColumn({0, 1, 0, 1}), createColumn>({0, 0, std::nullopt, std::nullopt})); + ASSERT_BITSHIFTRIGHT(createColumn>({0, 1, std::nullopt, std::nullopt}), createColumn>({0, 1, std::nullopt, std::nullopt}), createColumn>({0, 0, std::nullopt, std::nullopt})); + ASSERT_BITSHIFTRIGHT(createColumn>({0, 1, std::nullopt, std::nullopt}), createConstColumn(4, 0), createColumn>({0, 1, std::nullopt, std::nullopt})); + ASSERT_BITSHIFTRIGHT(createColumn>({0, 1, std::nullopt, std::nullopt}), createConstColumn(4, 0), createColumn>({0, 1, std::nullopt, std::nullopt})); + ASSERT_BITSHIFTRIGHT(createColumn>({0, 1, std::nullopt, std::nullopt}), createConstColumn>(4, std::nullopt), createConstColumn>(4, std::nullopt)); + + ASSERT_BITSHIFTRIGHT(createConstColumn(4, 1), createColumn({0, 1, 0, 1}), createColumn({1, 0, 1, 0})); + ASSERT_BITSHIFTRIGHT(createConstColumn(4, 1), createColumn>({0, 1, std::nullopt, std::nullopt}), createColumn>({1, 0, std::nullopt, std::nullopt})); + ASSERT_BITSHIFTRIGHT(createConstColumn(4, 1), createConstColumn(4, 0), createConstColumn(4, 1)); + ASSERT_BITSHIFTRIGHT(createConstColumn(4, 1), createConstColumn>(4, 0), createConstColumn(4, 1)); + ASSERT_BITSHIFTRIGHT(createConstColumn(4, 1), createConstColumn>(4, std::nullopt), createConstColumn>(4, std::nullopt)); + + ASSERT_BITSHIFTRIGHT(createConstColumn>(4, 1), createColumn({0, 1, 0, 1}), createColumn({1, 0, 1, 0})); + ASSERT_BITSHIFTRIGHT(createConstColumn>(4, 1), createColumn>({0, 1, std::nullopt, std::nullopt}), createColumn>({1, 0, std::nullopt, std::nullopt})); + ASSERT_BITSHIFTRIGHT(createConstColumn>(4, 1), createConstColumn(4, 0), createConstColumn(4, 1)); + ASSERT_BITSHIFTRIGHT(createConstColumn>(4, 1), createConstColumn>(4, 0), createConstColumn(4, 1)); + ASSERT_BITSHIFTRIGHT(createConstColumn>(4, 1), createConstColumn>(4, std::nullopt), createConstColumn>(4, std::nullopt)); + + ASSERT_BITSHIFTRIGHT(createConstColumn>(4, std::nullopt), createColumn({0, 1, 0, 1}), createConstColumn>(4, std::nullopt)); + ASSERT_BITSHIFTRIGHT(createConstColumn>(4, std::nullopt), createColumn>({0, 1, std::nullopt, std::nullopt}), createConstColumn>(4, std::nullopt)); + ASSERT_BITSHIFTRIGHT(createConstColumn>(4, std::nullopt), createConstColumn(4, 0), createConstColumn>(4, std::nullopt)); + ASSERT_BITSHIFTRIGHT(createConstColumn>(4, std::nullopt), createConstColumn(4, 0), createConstColumn>(4, std::nullopt)); + ASSERT_BITSHIFTRIGHT(createConstColumn>(4, std::nullopt), createConstColumn>(4, std::nullopt), createConstColumn>(4, std::nullopt)); +} +CATCH + +TEST_F(TestFunctionBitShiftRight, Boundary) +try +{ + ASSERT_BITSHIFTRIGHT(createColumn({127, 127, -128, -128}), createColumn({0, 7, 0, 7}), createColumn({127, 0, 18446744073709551488ull, 144115188075855871ull})); + ASSERT_BITSHIFTRIGHT(createColumn({127, 127, -128, -128}), createColumn({0, 7, 0, 7}), createColumn({127, 0, 18446744073709551488ull, 144115188075855871ull})); + ASSERT_BITSHIFTRIGHT(createColumn({32767, 32767, -32768, -32768}), createColumn({0, 15, 0, 15}), createColumn({32767, 0, 18446744073709518848ull, 562949953421311ull})); + + ASSERT_BITSHIFTRIGHT(createColumn({0, 0, 1, 1, -1, -1, INT64_MAX, INT64_MAX, INT64_MIN, INT64_MIN}), + createColumn({0, 63, 0, 63, 0, 63, 0, 63, 0, 63}), + createColumn({0, 0, 1, 0, 18446744073709551615ull, 1, INT64_MAX, 0, 9223372036854775808ull, 1})); +} +CATCH + +TEST_F(TestFunctionBitShiftRight, UINT64) +try +{ + ASSERT_BITSHIFTRIGHT(createColumn({0, UINT64_MAX}), + createColumn({63, 63}), + createColumn({0, 1})); + + ASSERT_BITSHIFTRIGHT(createColumn>({0, UINT64_MAX, std::nullopt}), + createColumn>({63, 63, 63}), + createColumn>({0, 1, std::nullopt})); + + ASSERT_BITSHIFTRIGHT(createColumn>({0, UINT64_MAX, std::nullopt}), + createColumn({63, 63, 63}), + createColumn>({0, 1, std::nullopt})); + + ASSERT_BITSHIFTRIGHT(createColumn({0, UINT64_MAX}), + createColumn>({63, 63}), + createColumn>({0, 1})); + + ASSERT_BITSHIFTRIGHT(createColumn({0, 0, 1, 1, -1, -1, INT64_MAX, INT64_MAX, INT64_MIN, INT64_MIN}), + createColumn({0, UINT64_MAX, 0, UINT64_MAX, 0, UINT64_MAX, 0, UINT64_MAX, 0, UINT64_MAX}), + createColumn({0, 0, 1, 0, 18446744073709551615ull, 0, INT64_MAX, 0, 9223372036854775808ull, 0})); + + + ASSERT_BITSHIFTRIGHT(createColumn({0, 0, UINT64_MAX, UINT64_MAX}), + createColumn({0, UINT64_MAX, 0, UINT64_MAX}), + createColumn({0, 0, UINT64_MAX, 0})); + + ASSERT_BITSHIFTRIGHT(createColumn>({0, 0, UINT64_MAX, UINT64_MAX, 0, std::nullopt}), + createColumn>({0, UINT64_MAX, 0, UINT64_MAX, std::nullopt, 0}), + createColumn>({0, 0, UINT64_MAX, 0, std::nullopt, std::nullopt})); + + ASSERT_BITSHIFTRIGHT(createColumn>({0, 0, UINT64_MAX, UINT64_MAX, std::nullopt}), + createColumn({0, UINT64_MAX, 0, UINT64_MAX, 0}), + createColumn>({0, 0, UINT64_MAX, 0, std::nullopt})); + + ASSERT_BITSHIFTRIGHT(createColumn({0, UINT64_MAX, 0, UINT64_MAX, 0}), + createColumn>({0, 0, UINT64_MAX, UINT64_MAX, std::nullopt}), + createColumn>({0, UINT64_MAX, 0, 0, std::nullopt})); + + /* + std::mt19937 gen(std::random_device{}()); + std::uniform_int_distribution dis( + std::numeric_limits::min(), + std::numeric_limits::max() + ); + size_t count = 100; + std::vector v1(count), v2(count), res(count); + for (size_t i=0; i> v2[i]; + } + */ + // clang-format off + ASSERT_BITSHIFTRIGHT(createColumn({4286230172992429668ull,11550684080080434735ull,775195682263841867ull,18390588538388462661ull,15578761645824658314ull,20662948907547635ull,8403266546632871011ull,10316916867086714284ull,14494183568060929367ull,11741337603037632348ull,10803264694948981380ull,2181969932373516503ull,9673801579564730047ull,12998855911221966916ull,13852157931865274857ull,9203926828777338586ull,8903261359104369984ull,3296258311466476456ull,14658801806079697908ull,7542518003247963618ull,7751150277360944372ull,12225694156629117269ull,3173837214287201256ull,10555082060194839563ull,14202570947308501213ull,13841194359225980123ull,9085267378073816945ull,15975493157631073381ull,1890233386459299033ull,2368634323417847398ull,691423931511513606ull,986000479038857169ull,6676906740954304741ull,2841686799872009560ull,6483676442160212821ull,12550114481083571140ull,1973026146580965947ull,15006687639313690830ull,6443617813685195609ull,13648732879238232658ull,173820604016606515ull,2669428687588070677ull,15361476519767969236ull,8957522718906827285ull,10484385204137290737ull,12390466571993898199ull,13655746682011856065ull,4183302523705398003ull,9898692767945122925ull,16701902679050716746ull,15003324714492513897ull,15554724240808081962ull,7754458312088240871ull,16060968032680196798ull,12619581440986221928ull,15462661961676206824ull,2991773628650321635ull,16341599119345297909ull,14943939970889580769ull,17589764776976679210ull,15274914527536421890ull,16268454608136611433ull,14617646699124891378ull,466927094873143934ull,10558583305251737283ull,255559140356160501ull,5962789691899784330ull,8004603198837555992ull,1881892337023478820ull,6549167700870881840ull,17551996157828573642ull,3349744237253314638ull,2876698686583880568ull,16792783373922568330ull,16231348759981899800ull,17731631990557975899ull,1305376485657663531ull,3568754485566225727ull,10076204423028931225ull,1206238310176455071ull,4297062324543635867ull,5116785256928623516ull,4216305034157620433ull,412817651268481791ull,11256299741838589766ull,10786197076871163667ull,8588357635228913652ull,6361409982074778071ull,4750871994764527580ull,12851835128796581697ull,13871712051825681122ull,12445309465661589227ull,1668617678034382020ull,10152918068481134781ull,16242941973571224246ull,12988338226657152812ull,2352083670492692674ull,10735026236980245779ull,14986388012066843516ull,17651064432466444102ull}), + createColumn({0,58,55,24,5,35,34,54,43,45,17,36,51,54,19,55,55,8,37,49,15,11,36,0,5,41,46,54,2,59,11,25,43,29,31,8,59,2,11,19,56,35,57,13,2,35,6,54,17,0,49,5,15,3,60,44,16,6,57,44,58,54,26,23,58,23,26,29,56,40,45,2,21,9,57,40,4,46,17,15,62,21,5,54,22,47,10,24,53,61,43,52,23,10,61,43,26,31,38,2}), + createColumn({4286230172992429668ull,40,21,1096164497041ull,486836301432020572ull,601370,489134489,572,1647797,333708,82422368583289ull,31751841,4296,721,26420894492846ull,255,247,12876009029165923ull,106656820,13398,236546334147978ull,5969577224916561ull,46185410,10555082060194839563ull,443830342103390662ull,6294246,129109,886,472558346614824758ull,4,337609341558356ull,29385104150ull,759076,5293054133ull,3019197118ull,49023884691732699ull,3,3751671909828422707ull,3146297760588474ull,26032891996838ull,2,77690599,106,1093447597522806ull,2621096301034322684ull,360610038,213371041906435251ull,232,75521032470284ull,16701902679050716746ull,26651,486085132525252561ull,236647287356208ull,2007621004085024599ull,10,878950,45650842722325ull,255337486239770279ull,103,999862,52,903,217819909738ull,55662047251ull,36,30465023560ull,88852490364ull,14909735319ull,26,5956433,498857,837436059313328659ull,1371716826717ull,32798405027192516ull,112,16126825,81586030353603970ull,50715,76875338920813ull,36811471868177ull,0,2439873341049ull,131759532317425638ull,22,2683710990390ull,76640,8387068003153235ull,379169582252ull,527,5,1577031,2763,198914727930ull,9914959051251108ull,7,1476603,35048777915ull,4998886136ull,54520161,4412766108116611025ull})); + // clang-format on +} +CATCH + +TEST_F(TestFunctionBitShiftRight, UB) +try +{ + ASSERT_BITSHIFTRIGHT(createColumn({127, -128}), createColumn({64, 64}), createColumn({0, 0})); + ASSERT_BITSHIFTRIGHT(createColumn({127, -128}), createColumn({64, 64}), createColumn({0, 0})); + ASSERT_BITSHIFTRIGHT(createColumn({32767, -32768}), createColumn({64, 64}), createColumn({0, 0})); + ASSERT_BITSHIFTRIGHT(createColumn({INT32_MAX, INT32_MIN}), createColumn({64, 64}), createColumn({0, 0})); + ASSERT_BITSHIFTRIGHT(createColumn({INT64_MAX, INT64_MIN}), createColumn({64, 64}), createColumn({0, 0})); + + ASSERT_BITSHIFTRIGHT(createColumn({255}), createColumn({64}), createColumn({0})); + ASSERT_BITSHIFTRIGHT(createColumn({255}), createColumn({64}), createColumn({0})); + ASSERT_BITSHIFTRIGHT(createColumn({65535}), createColumn({64}), createColumn({0})); + ASSERT_BITSHIFTRIGHT(createColumn({UINT32_MAX}), createColumn({64}), createColumn({0})); + ASSERT_BITSHIFTRIGHT(createColumn({UINT64_MAX}), createColumn({64}), createColumn({0})); + + /* + std::mt19937 gen(std::random_device{}()); + std::uniform_int_distribution dis1( + std::numeric_limits::min(), + std::numeric_limits::max() + ); + std::uniform_int_distribution dis2( + 64, + std::numeric_limits::max() + ); + size_t count = 100; + std::vector v1(count), v2(count), res(count); + for (size_t i=0; i({17563387625296433369ull,5842891814427459261ull,15074502074821508463ull,386435802999553003ull,5487893274931198395ull,8125923807366590570ull,13340330062727071249ull,14908193031091561411ull,296805448857369387ull,8684453485792353774ull,13117933444495098288ull,3225762988982100714ull,11290506757949810556ull,14617912756126856962ull,9479575714707174581ull,11720728318194739598ull,14410575429605211363ull,12068356718035872518ull,80682389916710599ull,11003236134534292734ull,4412447398096224810ull,5331184707993902906ull,13827083432789678788ull,958142831027309576ull,16716461997317184701ull,17128750834581527743ull,11590434571174666313ull,10204342520615148287ull,11067791415848657283ull,17583875436196878829ull,186304014359496415ull,9381729025189804702ull,11502205568225715300ull,16472133582690439104ull,3743303387826342067ull,12860029445868505658ull,2244056593742923769ull,3275687468466891223ull,1545828456957460699ull,14187252460708728077ull,7551907967738536187ull,9754400233340010491ull,16293183350230169116ull,6298812696728711031ull,5915538565572009956ull,2284684518775825662ull,1130711226902262476ull,17158957721471765323ull,4220824385439711070ull,16559772875254313109ull,15397179690017513678ull,6300413832999049491ull,13787530251307637715ull,10132349060092695582ull,10446586881482901699ull,15759779838283537085ull,14402587207027333363ull,5546051719872960161ull,6545031029710296628ull,17407295406267098658ull,4259019625544816073ull,791895457880289787ull,8549227257401578066ull,15246278171168501125ull,1674668228908076954ull,849762797502000057ull,13302651500925764574ull,12438174880334092333ull,17701249772557033303ull,10742459186038873636ull,15671491258945407856ull,9352557101631889001ull,8914093883925002585ull,17935292744735591949ull,606989231583658922ull,6528503454270721815ull,14980539549624989095ull,13765196438235456668ull,3058323869228644592ull,14346577759191739044ull,1543206286382906519ull,1025562312317433790ull,17052896445025268012ull,18349597294988935754ull,17174604730104962524ull,11924965352621110201ull,502032511104181724ull,13845633389643139332ull,15436039204445155412ull,17809579006694175565ull,15166364145138562881ull,14062748599121933798ull,1777457178576774356ull,4985224560472716170ull,3881603168175384251ull,11555031280550342082ull,1252677486917153396ull,8744807353133366467ull,2048964426549800495ull,11945831330508218140ull}), + createColumn({7570379165150948640ull,2086259313016069849ull,3606689596671293211ull,14039117280692395662ull,13678665403528829741ull,16069000531561010558ull,18229345530821449414ull,433464578739092378ull,6298872104011095934ull,4518228872693063137ull,14988726875963869472ull,9568218424260764817ull,5383191468426384555ull,8698762658876708752ull,9487599666567205013ull,14370091126330876161ull,10702068376663045773ull,8045701071228357739ull,10878469353312437370ull,3183167829827610494ull,5928881618833110378ull,10410530709181481816ull,249988564503361262ull,13482614555530280987ull,5522946068620734806ull,12797173590813112894ull,14133419908717831141ull,10825732602137508628ull,13271177233899692778ull,1157753039017783757ull,3370600557036147696ull,2957689395775524062ull,11963898745206689513ull,4828931188614542720ull,15157289330857160797ull,369467010700905309ull,6278071805692607460ull,17817858137511910604ull,17789013631125929528ull,2861684947245777353ull,2583152408663154190ull,7935135702156687355ull,3033127046167579202ull,14224256960933395097ull,10838403249753694181ull,2154089102842257532ull,7860358918492191001ull,2982010253383852617ull,16385171982396620123ull,12241857497176342828ull,2080931105225959532ull,1046322072991155713ull,6146917059052005252ull,17411786298437646544ull,5497869583209795613ull,11701448129764809247ull,12642962700918363620ull,15936842187305218463ull,7811510447588439153ull,3558405966224377785ull,977960926168429540ull,9505800334935014018ull,12114068456102275321ull,5141880021314950000ull,6719615890604904521ull,1341445859098821585ull,3883912906202435997ull,2107770591867486616ull,2657186337437393032ull,2640917573672927653ull,3746140861437224253ull,15057648507099656234ull,12051189681068107042ull,2259769676757597701ull,2935229535510718769ull,6368233316971463582ull,14384644474340782197ull,2553547617837260603ull,14238122466576902747ull,9555765226032904481ull,15522640015319979866ull,10274396157562093026ull,5996101113505388770ull,16915812546351047056ull,4956089714130804219ull,17126605744801075545ull,12036643325202409080ull,11257234688654558199ull,375338337104024778ull,11152980243617851986ull,12325805905403174063ull,8653948654121626815ull,15348912598299408338ull,6883296938248095081ull,6484642948886870833ull,16936141613107270500ull,17012171815528507292ull,2574129622316042070ull,17178726110735453748ull,16578303277501346489ull}), + createColumn({0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0})); + // clang-format on +} +CATCH + +} // namespace tests +} // namespace DB diff --git a/dbms/src/Functions/tests/gtest_duration_pushdown.cpp b/dbms/src/Functions/tests/gtest_duration_pushdown.cpp index 4501a4c9fae..106f3d84642 100644 --- a/dbms/src/Functions/tests/gtest_duration_pushdown.cpp +++ b/dbms/src/Functions/tests/gtest_duration_pushdown.cpp @@ -166,5 +166,85 @@ try ASSERT_COLUMN_EQ(microSecond_out, executeFunction("microSecond", input4)); } CATCH + +TEST_F(DurationPushDown, timeToSecPushDownTest) +try +{ + ColumnWithTypeAndName input( + createColumn>({(838 * 3600 + 59 * 60 + 59) * 1000000000L + 999999000L, + -(838 * 3600 + 59 * 60 + 59) * 1000000000L - 123456000L, + 0, + (1 * 3600 + 2 * 60 + 3) * 1000000000L + 4000L}) + .column, + makeNullable(std::make_shared(6)), + "input"); + auto second_output = createColumn>({3020399, -3020399, 0, 3723}); + ASSERT_COLUMN_EQ(second_output, executeFunction("tidbTimeToSec", input)); + + // Test Overflow + ColumnWithTypeAndName input2( + createColumn>({(838 * 3600 + 59 * 60 + 59) * 1000000000L + 999999000L + 1000L}).column, + makeNullable(std::make_shared(6)), + "result"); + try + { + auto result = executeFunction("tidbTimeToSec", input2); + FAIL() << "Expected overflow"; + } + catch (DB::Exception & e) + { + ASSERT_EQ(e.message(), std::string("nanos must >= -3020399999999000 and <= 3020399999999000")); + } + catch (...) + { + FAIL() << "Expected overflow"; + }; + + ColumnWithTypeAndName input3( + createColumn>({-(838 * 3600 + 59 * 60 + 59) * 1000000000L - 999999000L - 1000L}).column, + makeNullable(std::make_shared(6)), + "result"); + try + { + auto result = executeFunction("tidbTimeToSec", input3); + FAIL() << "Expected overflow"; + } + catch (DB::Exception & e) + { + ASSERT_EQ(e.message(), std::string("nanos must >= -3020399999999000 and <= 3020399999999000")); + } + catch (...) + { + FAIL() << "Expected overflow"; + }; + + // Random Test + constexpr int rowNum = 1000; + auto dur_column = ColumnVector::create(); + auto & dur_data = dur_column->getData(); + auto second_column = ColumnVector::create(); + auto & second_data = second_column->getData(); + dur_data.resize(rowNum); + second_data.resize(rowNum); + + std::random_device rd; + std::default_random_engine gen = std::default_random_engine(rd()); + std::uniform_int_distribution sign_dis(0, 1), hour_dis(0, 838), minute_dis(0, 59), second_dis(0, 59), microSecond_dis(0, 999999); + for (int i = 0; i < rowNum; ++i) + { + auto sign = (sign_dis(gen) == 0) ? 1 : -1; + auto hour = hour_dis(gen); + auto minute = minute_dis(gen); + auto second = second_dis(gen); + auto microSecond = microSecond_dis(gen); + dur_data[i] = sign * ((hour * 3600 + minute * 60 + second) * 1000000000L + microSecond * 1000L); + second_data[i] = sign * (hour * 3600 + minute * 60 + second); + } + + ColumnWithTypeAndName input4(std::move(dur_column), std::make_shared(6), "duration"); + ColumnWithTypeAndName second_out(std::move(second_column), std::make_shared(), "time_to_sec"); + ASSERT_COLUMN_EQ(second_out, executeFunction("tidbTimeToSec", input4)); +} +CATCH } // namespace tests } // namespace DB \ No newline at end of file diff --git a/dbms/src/Functions/tests/gtest_get_format.cpp b/dbms/src/Functions/tests/gtest_get_format.cpp new file mode 100644 index 00000000000..61a8d80e7b4 --- /dev/null +++ b/dbms/src/Functions/tests/gtest_get_format.cpp @@ -0,0 +1,153 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wsign-compare" +#include + +#pragma GCC diagnostic pop + +namespace DB::tests +{ +class GetFormatTest : public DB::tests::FunctionTest +{ +public: + static constexpr auto funcName = "getFormat"; +}; + +TEST_F(GetFormatTest, testBoundary) +try +{ + // const(non-null), vector + // time_type is a const with non null value + // location is a vector containing null + ASSERT_COLUMN_EQ( + createColumn>({"%m.%d.%Y", {}}), + executeFunction( + funcName, + createConstColumn>(2, "DATE"), + createColumn>({"USA", {}}))); + + // const(null), vector + // time_type is a const with null value + // location is a vector containing null + ASSERT_COLUMN_EQ( + createConstColumn>(2, {}), + executeFunction( + funcName, + createConstColumn>(2, {}), + createColumn>({"USA", {}}))); + + // const(non-null), const(non-null) + // time_type is a const with non null value + // location is a const with non null value + ASSERT_COLUMN_EQ( + createConstColumn(2, "%m.%d.%Y"), + executeFunction( + funcName, + createConstColumn>(2, "DATE"), + createConstColumn>(2, "USA"))); + + // const(non-null), const(null) + // time_type is a const with non null value + // location is a const with null value + ASSERT_COLUMN_EQ( + createConstColumn>(2, {}), + executeFunction( + funcName, + createConstColumn>(2, "DATE"), + createConstColumn>(2, {}))); + + // The time_type is a system pre_defined macro, thus assume time_type column is const + // Throw an exception is time_type is not ColumnConst + ASSERT_THROW( + executeFunction( + funcName, + createColumn>({"DATE", "TIME"}), + createColumn>({"USA", {}})), + DB::Exception); +} +CATCH + +TEST_F(GetFormatTest, testMoreCases) +try +{ + // time_type: DATE + // all locations + ASSERT_COLUMN_EQ( + createColumn>({"%m.%d.%Y", "%Y-%m-%d", "%Y-%m-%d", "%d.%m.%Y", "%Y%m%d"}), + executeFunction( + funcName, + createConstColumn>(5, "DATE"), + createColumn>({"USA", "JIS", "ISO", "EUR", "INTERNAL"}))); + + // time_type: DATETIME + // all locations + ASSERT_COLUMN_EQ( + createColumn>({"%Y-%m-%d %H.%i.%s", "%Y-%m-%d %H:%i:%s", "%Y-%m-%d %H:%i:%s", "%Y-%m-%d %H.%i.%s", "%Y%m%d%H%i%s"}), + executeFunction( + funcName, + createConstColumn>(5, "DATETIME"), + createColumn>({"USA", "JIS", "ISO", "EUR", "INTERNAL"}))); + + // time_type: TIMESTAMP + // all locations + ASSERT_COLUMN_EQ( + createColumn>({"%Y-%m-%d %H.%i.%s", "%Y-%m-%d %H:%i:%s", "%Y-%m-%d %H:%i:%s", "%Y-%m-%d %H.%i.%s", "%Y%m%d%H%i%s"}), + executeFunction( + funcName, + createConstColumn>(5, "TIMESTAMP"), + createColumn>({"USA", "JIS", "ISO", "EUR", "INTERNAL"}))); + + // time_type: TIME + // all locations + ASSERT_COLUMN_EQ( + createColumn>({"%h:%i:%s %p", "%H:%i:%s", "%H:%i:%s", "%H.%i.%s", "%H%i%s"}), + executeFunction( + funcName, + createConstColumn>(5, "TIME"), + createColumn>({"USA", "JIS", "ISO", "EUR", "INTERNAL"}))); + + // the location is not in ("USA", "JIS", "ISO", "EUR", "INTERNAL") + ASSERT_COLUMN_EQ( + createColumn>({"", ""}), + executeFunction( + funcName, + createConstColumn>(2, "TIME"), + createColumn>({"CAN", ""}))); + + // the time_type is not in ("DATE", "DATETIME", "TIMESTAMP", "TIME") + ASSERT_COLUMN_EQ( + createColumn>({"", ""}), + executeFunction( + funcName, + createConstColumn>(2, "TIMEINUTC"), + createColumn>({"USA", "ISO"}))); +} +CATCH + +} // namespace DB::tests diff --git a/dbms/src/Functions/tests/gtest_strings_format.cpp b/dbms/src/Functions/tests/gtest_strings_format.cpp index 2d571a9bb1b..8f3b899316e 100644 --- a/dbms/src/Functions/tests/gtest_strings_format.cpp +++ b/dbms/src/Functions/tests/gtest_strings_format.cpp @@ -34,7 +34,7 @@ class StringFormat : public DB::tests::FunctionTest using FieldType = DecimalField; using NullableDecimal = Nullable; ASSERT_COLUMN_EQ( - createColumn>({"0.0000", "-0.0120", "0.0120", "12,332.1000", "12,332", "12,332", "12,332.300000000000000000000000000000", "-12,332.30000", "-1,000.0", "-333.33", {}}), + createColumn>({"0.0000", "-0.0120", "0.0120", "12,332.1000", "12,332", "12,332", "12,332.300000000000000000000000000000", "-12,332.30000", "-1,000.0", "-333.33", {}, "99,999.9999000000", "100,000.000", "100,000"}), executeFunction( func_name, createColumn( @@ -49,8 +49,11 @@ class StringFormat : public DB::tests::FunctionTest FieldType(static_cast(-123323000), 4), FieldType(static_cast(-9999999), 4), FieldType(static_cast(-3333330), 4), - FieldType(static_cast(0), 0)}), - createColumn>({4, 4, 4, 4, 0, -1, 31, 5, 1, 2, {}}))); + FieldType(static_cast(0), 0), + FieldType(static_cast(999999999), 4), + FieldType(static_cast(999999999), 4), + FieldType(static_cast(999999999), 4)}), + createColumn>({4, 4, 4, 4, 0, -1, 31, 5, 1, 2, {}, 10, 3, -5}))); ASSERT_COLUMN_EQ( createColumn>({"12,332.100", "-12,332.300", "-1,000.000", "-333.333"}), executeFunction( @@ -62,8 +65,6 @@ class StringFormat : public DB::tests::FunctionTest FieldType(static_cast(-9999999), 4), FieldType(static_cast(-3333330), 4)}), createConstColumn>(4, 3))); - /// known issue https://github.com/pingcap/tiflash/issues/4891 - /* ASSERT_COLUMN_EQ( createColumn>({"-999.9999", "-1,000", "-1,000", "-999.999900000000000000000000000000", "-999.99990", "-1,000.0", "-1,000.00"}), executeFunction( @@ -74,7 +75,7 @@ class StringFormat : public DB::tests::FunctionTest FieldType(static_cast(-9999999), 4)), createColumn>({4, 0, -1, 31, 5, 1, 2}))); ASSERT_COLUMN_EQ( - createConstColumn>(1, "-1,000.000"), + createConstColumn(1, "-1,000.000"), executeFunction( func_name, createConstColumn( @@ -82,7 +83,6 @@ class StringFormat : public DB::tests::FunctionTest 1, FieldType(static_cast(-9999999), 4)), createConstColumn>(1, 3))); - */ ASSERT_COLUMN_EQ( createColumn>({"12,332.1000", "12,332", "12,332.300000000000000000000000000000", "-12,332.30000", "-1,000.0", "-333.33", {}}), executeFunction( @@ -108,8 +108,6 @@ class StringFormat : public DB::tests::FunctionTest FieldType(static_cast(-9999999), 4), FieldType(static_cast(-3333330), 4)}), createConstColumn>(4, 3))); - /// known issue https://github.com/pingcap/tiflash/issues/4891 - /* ASSERT_COLUMN_EQ( createColumn>({"-999.9999", "-1,000", "-999.999900000000000000000000000000", "-999.99990", "-1,000.0", "-1,000.00"}), executeFunction( @@ -120,7 +118,7 @@ class StringFormat : public DB::tests::FunctionTest FieldType(static_cast(-9999999), 4)), createColumn>({4, 0, 31, 5, 1, 2}))); ASSERT_COLUMN_EQ( - createConstColumn>(1, "-1,000.000"), + createConstColumn(1, "-1,000.000"), executeFunction( func_name, createConstColumn( @@ -128,7 +126,6 @@ class StringFormat : public DB::tests::FunctionTest 1, FieldType(static_cast(-9999999), 4)), createConstColumn>(1, 3))); - */ } template diff --git a/dbms/src/Functions/tests/gtest_strings_reverse.cpp b/dbms/src/Functions/tests/gtest_strings_reverse.cpp new file mode 100644 index 00000000000..304a403db83 --- /dev/null +++ b/dbms/src/Functions/tests/gtest_strings_reverse.cpp @@ -0,0 +1,120 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include +#include + +#include +#include + +#pragma GCC diagnostic pop + +namespace DB::tests +{ +class StringReverse : public DB::tests::FunctionTest +{ +protected: + static ColumnWithTypeAndName toVec(const std::vector & v) + { + return createColumn(v); + } + + static ColumnWithTypeAndName toNullableVec(const std::vector> & v) + { + return createColumn>(v); + } + + static ColumnWithTypeAndName toConst(const String & s) + { + return createConstColumn(1, s); + } +}; +// test reverse +TEST_F(StringReverse, stringReverseTest) +try +{ + std::vector candidate_strings = {"one week's time test", "abcdef", "abcabc", "moc.pacgnip"}; + std::vector reversed_strings = {"tset emit s'keew eno", "fedcba", "cbacba", "pingcap.com"}; + + // test vector + ASSERT_COLUMN_EQ( + toVec(reversed_strings), + executeFunction( + "reverse", + toVec(candidate_strings))); + + // test nullable + ASSERT_COLUMN_EQ( + toNullableVec({"", " ", {}, "pacgnip"}), + executeFunction( + "reverse", + toNullableVec({"", " ", {}, "pingcap"}))); + + // test const + ASSERT_COLUMN_EQ( + toConst("pacgnip"), + executeFunction( + "reverse", + toConst("pingcap"))); + + // test null + ASSERT_COLUMN_EQ( + toConst({}), + executeFunction( + "reverse", + toConst({}))); +} +CATCH + +// test reverseUTF8 +TEST_F(StringReverse, stringReverseUTF8Test) +try +{ + std::vector candidate_strings = {"one week's time test", "abc测试def", "abcテストabc", "ѐёђѓєѕіїјљњћќѝўџ", "+ѐ-ё*ђ/ѓ!є@ѕ#і$@ї%ј……љ&њ(ћ)ќ¥ѝ#ў@џ!^", "αβγδεζηθικλμνξοπρστυφχψωσ", "▲α▼βγ➨δε☎ζη✂θι€κλ♫μν✓ξο✚πρ℉στ♥υφ♖χψ♘ω★σ✕", "թփձջրչճժծքոեռտըւիօպասդֆգհյկլխզղցվբնմշ"}; + std::vector reversed_strings = {"tset emit s'keew eno", "fed试测cba", "cbaトステcba", "џўѝќћњљјїіѕєѓђёѐ", "^!џ@ў#ѝ¥ќ)ћ(њ&љ……ј%ї@$і#ѕ@є!ѓ/ђ*ё-ѐ+", "σωψχφυτσρποξνμλκιθηζεδγβα", "✕σ★ω♘ψχ♖φυ♥τσ℉ρπ✚οξ✓νμ♫λκ€ιθ✂ηζ☎εδ➨γβ▼α▲", "շմնբվցղզխլկյհգֆդսապօիւըտռեոքծժճչրջձփթ"}; + + // test vector + ASSERT_COLUMN_EQ( + toVec(reversed_strings), + executeFunction( + "reverseUTF8", + toVec(candidate_strings))); + + // test nullable + ASSERT_COLUMN_EQ( + toNullableVec({"", " ", {}, "pacgnip"}), + executeFunction( + "reverseUTF8", + toNullableVec({"", " ", {}, "pingcap"}))); + + // test const + ASSERT_COLUMN_EQ( + toConst("pacgnip"), + executeFunction( + "reverseUTF8", + toConst("pingcap"))); + + // test null + ASSERT_COLUMN_EQ( + toConst({}), + executeFunction( + "reverseUTF8", + toConst({}))); +} +CATCH + +} // namespace DB::tests \ No newline at end of file diff --git a/dbms/src/IO/WriteBuffer.h b/dbms/src/IO/WriteBuffer.h index 361081d1176..0c0fa2cb545 100644 --- a/dbms/src/IO/WriteBuffer.h +++ b/dbms/src/IO/WriteBuffer.h @@ -96,6 +96,24 @@ class WriteBuffer : public BufferBase } } + template + __attribute__((always_inline)) void writeFixed(const T * __restrict from) + { + if (likely(working_buffer.end() - pos >= static_cast(sizeof(T)))) + { + tiflash_compiler_builtin_memcpy(pos, from, sizeof(T)); + pos += sizeof(T); + } + else + { + [&]() __attribute__((noinline)) + { + write(reinterpret_cast(from), sizeof(T)); + } + (); + } + } + inline void write(char x) { diff --git a/dbms/src/Interpreters/Aggregator.cpp b/dbms/src/Interpreters/Aggregator.cpp index 6a39bc333a8..6cb947a1bfa 100644 --- a/dbms/src/Interpreters/Aggregator.cpp +++ b/dbms/src/Interpreters/Aggregator.cpp @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -48,6 +49,11 @@ extern const int CANNOT_MERGE_DIFFERENT_AGGREGATED_DATA_VARIANTS; extern const int LOGICAL_ERROR; } // namespace ErrorCodes +namespace FailPoints +{ +extern const char random_aggregate_create_state_failpoint[]; +extern const char random_aggregate_merge_failpoint[]; +} // namespace FailPoints AggregatedDataVariants::~AggregatedDataVariants() { @@ -317,6 +323,7 @@ void Aggregator::createAggregateStates(AggregateDataPtr & aggregate_data) const * In order that then everything is properly destroyed, we "roll back" some of the created states. * The code is not very convenient. */ + FAIL_POINT_TRIGGER_EXCEPTION(FailPoints::random_aggregate_create_state_failpoint); aggregate_functions[j]->create(aggregate_data + offsets_of_aggregate_states[j]); } catch (...) @@ -1504,6 +1511,8 @@ class MergingAndConvertingBlockInputStream : public IProfilingBlockInputStream if (current_bucket_num >= NUM_BUCKETS) return {}; + FAIL_POINT_TRIGGER_EXCEPTION(FailPoints::random_aggregate_merge_failpoint); + AggregatedDataVariantsPtr & first = data[0]; if (current_bucket_num == -1) diff --git a/dbms/src/Interpreters/Context.cpp b/dbms/src/Interpreters/Context.cpp index 44699a324f4..7cd0cb5ad53 100644 --- a/dbms/src/Interpreters/Context.cpp +++ b/dbms/src/Interpreters/Context.cpp @@ -68,9 +68,9 @@ #include #include -#include #include #include +#include namespace ProfileEvents @@ -78,8 +78,6 @@ namespace ProfileEvents extern const Event ContextLock; } -#include - namespace CurrentMetrics { extern const Metric GlobalStorageRunMode; @@ -1440,20 +1438,32 @@ void Context::dropCaches() const shared->mark_cache->reset(); } -BackgroundProcessingPool & Context::getBackgroundPool() +BackgroundProcessingPool & Context::initializeBackgroundPool(UInt16 pool_size) { auto lock = getLock(); if (!shared->background_pool) - shared->background_pool = std::make_shared(settings.background_pool_size); + shared->background_pool = std::make_shared(pool_size); return *shared->background_pool; } -BackgroundProcessingPool & Context::getBlockableBackgroundPool() +BackgroundProcessingPool & Context::getBackgroundPool() +{ + auto lock = getLock(); + return *shared->background_pool; +} + +BackgroundProcessingPool & Context::initializeBlockableBackgroundPool(UInt16 pool_size) { - // TODO: choose a better thread pool size and maybe a better name for the pool auto lock = getLock(); if (!shared->blockable_background_pool) - shared->blockable_background_pool = std::make_shared(settings.background_pool_size); + shared->blockable_background_pool = std::make_shared(pool_size); + return *shared->blockable_background_pool; +} + +BackgroundProcessingPool & Context::getBlockableBackgroundPool() +{ + // TODO: maybe a better name for the pool + auto lock = getLock(); return *shared->blockable_background_pool; } diff --git a/dbms/src/Interpreters/Context.h b/dbms/src/Interpreters/Context.h index b6e759e364b..7663b40f612 100644 --- a/dbms/src/Interpreters/Context.h +++ b/dbms/src/Interpreters/Context.h @@ -379,7 +379,9 @@ class Context void setUseL0Opt(bool use_l0_opt); bool useL0Opt() const; + BackgroundProcessingPool & initializeBackgroundPool(UInt16 pool_size); BackgroundProcessingPool & getBackgroundPool(); + BackgroundProcessingPool & initializeBlockableBackgroundPool(UInt16 pool_size); BackgroundProcessingPool & getBlockableBackgroundPool(); void createTMTContext(const TiFlashRaftConfig & raft_config, pingcap::ClusterConfig && cluster_config); @@ -505,7 +507,7 @@ class DDLGuard class SessionCleaner { public: - SessionCleaner(Context & context_) + explicit SessionCleaner(Context & context_) : context{context_} {} ~SessionCleaner(); diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.cpp b/dbms/src/Interpreters/InterpreterSelectQuery.cpp index 01e8625f943..3514f915626 100644 --- a/dbms/src/Interpreters/InterpreterSelectQuery.cpp +++ b/dbms/src/Interpreters/InterpreterSelectQuery.cpp @@ -13,6 +13,7 @@ // limitations under the License. #include +#include #include #include #include @@ -93,6 +94,12 @@ extern const int SCHEMA_VERSION_ERROR; extern const int UNKNOWN_EXCEPTION; } // namespace ErrorCodes + +namespace FailPoints +{ +extern const char pause_query_init[]; +} // namespace FailPoints + InterpreterSelectQuery::InterpreterSelectQuery( const ASTPtr & query_ptr_, const Context & context_, @@ -131,6 +138,15 @@ InterpreterSelectQuery::~InterpreterSelectQuery() = default; void InterpreterSelectQuery::init(const Names & required_result_column_names) { + /// the failpoint pause_query_init should use with the failpoint unblock_query_init_after_write, + /// to fulfill that the select query action will be blocked before init state to wait the write action finished. + /// In using, we need enable unblock_query_init_after_write in our test code, + /// and before each write statement take effect, we need enable pause_query_init. + /// When the write action finished, the pause_query_init will be disabled automatically, + /// and then the select query could be continued. + /// you can refer multi_alter_with_write.test for an example. + FAIL_POINT_PAUSE(FailPoints::pause_query_init); + if (!context.hasQueryContext()) context.setQueryContext(context); @@ -496,13 +512,13 @@ void InterpreterSelectQuery::executeImpl(Pipeline & pipeline, const BlockInputSt { const auto & join = static_cast(*query.join()->table_join); if (join.kind == ASTTableJoin::Kind::Full || join.kind == ASTTableJoin::Kind::Right) - pipeline.stream_with_non_joined_data = expressions.before_join->createStreamWithNonJoinedDataIfFullOrRightJoin( + pipeline.streams_with_non_joined_data.push_back(expressions.before_join->createStreamWithNonJoinedDataIfFullOrRightJoin( pipeline.firstStream()->getHeader(), 0, 1, - settings.max_block_size); + settings.max_block_size)); - for (auto & stream : pipeline.streams) /// Applies to all sources except stream_with_non_joined_data. + for (auto & stream : pipeline.streams) /// Applies to all sources except streams_with_non_joined_data. stream = std::make_shared(stream, expressions.before_join, /*req_id=*/""); } @@ -587,7 +603,7 @@ void InterpreterSelectQuery::executeImpl(Pipeline & pipeline, const BlockInputSt if (need_second_distinct_pass || query.limit_length || query.limit_by_expression_list - || pipeline.stream_with_non_joined_data) + || !pipeline.streams_with_non_joined_data.empty()) { need_merge_streams = true; } @@ -971,11 +987,11 @@ void InterpreterSelectQuery::executeAggregation(Pipeline & pipeline, const Expre Aggregator::Params params(header, keys, aggregates, overflow_row, settings.max_rows_to_group_by, settings.group_by_overflow_mode, allow_to_use_two_level_group_by ? settings.group_by_two_level_threshold : SettingUInt64(0), allow_to_use_two_level_group_by ? settings.group_by_two_level_threshold_bytes : SettingUInt64(0), settings.max_bytes_before_external_group_by, settings.empty_result_for_aggregation_by_empty_set, context.getTemporaryPath()); /// If there are several sources, then we perform parallel aggregation - if (pipeline.streams.size() > 1) + if (pipeline.streams.size() > 1 || pipeline.streams_with_non_joined_data.size() > 1) { - pipeline.firstStream() = std::make_shared( + auto stream = std::make_shared( pipeline.streams, - pipeline.stream_with_non_joined_data, + pipeline.streams_with_non_joined_data, params, file_provider, final, @@ -985,19 +1001,21 @@ void InterpreterSelectQuery::executeAggregation(Pipeline & pipeline, const Expre : static_cast(settings.max_threads), /*req_id=*/""); - pipeline.stream_with_non_joined_data = nullptr; pipeline.streams.resize(1); + pipeline.streams_with_non_joined_data.clear(); + pipeline.firstStream() = std::move(stream); } else { BlockInputStreams inputs; if (!pipeline.streams.empty()) inputs.push_back(pipeline.firstStream()); - else - pipeline.streams.resize(1); - if (pipeline.stream_with_non_joined_data) - inputs.push_back(pipeline.stream_with_non_joined_data); + if (!pipeline.streams_with_non_joined_data.empty()) + inputs.push_back(pipeline.streams_with_non_joined_data.at(0)); + + pipeline.streams.resize(1); + pipeline.streams_with_non_joined_data.clear(); pipeline.firstStream() = std::make_shared( std::make_shared(inputs, /*req_id=*/""), @@ -1005,8 +1023,6 @@ void InterpreterSelectQuery::executeAggregation(Pipeline & pipeline, const Expre file_provider, final, /*req_id=*/""); - - pipeline.stream_with_non_joined_data = nullptr; } } @@ -1228,21 +1244,33 @@ void InterpreterSelectQuery::executeDistinct(Pipeline & pipeline, bool before_or void InterpreterSelectQuery::executeUnion(Pipeline & pipeline) { - /// If there are still several streams, then we combine them into one - if (pipeline.hasMoreThanOneStream()) + switch (pipeline.streams.size() + pipeline.streams_with_non_joined_data.size()) { - pipeline.firstStream() = std::make_shared>( + case 0: + break; + case 1: + { + if (pipeline.streams.size() == 1) + break; + // streams_with_non_joined_data's size is 1. + pipeline.streams.push_back(pipeline.streams_with_non_joined_data.at(0)); + pipeline.streams_with_non_joined_data.clear(); + break; + } + default: + { + BlockInputStreamPtr stream = std::make_shared>( pipeline.streams, - pipeline.stream_with_non_joined_data, + pipeline.streams_with_non_joined_data, max_streams, /*req_id=*/""); - pipeline.stream_with_non_joined_data = nullptr; + ; + pipeline.streams.resize(1); + pipeline.streams_with_non_joined_data.clear(); + pipeline.firstStream() = std::move(stream); + break; } - else if (pipeline.stream_with_non_joined_data) - { - pipeline.streams.push_back(pipeline.stream_with_non_joined_data); - pipeline.stream_with_non_joined_data = nullptr; } } diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.h b/dbms/src/Interpreters/InterpreterSelectQuery.h index 474ace7ee84..d1bcec2a3dd 100644 --- a/dbms/src/Interpreters/InterpreterSelectQuery.h +++ b/dbms/src/Interpreters/InterpreterSelectQuery.h @@ -95,7 +95,7 @@ class InterpreterSelectQuery : public IInterpreter * It has a special meaning, since reading from it should be done after reading from the main streams. * It is appended to the main streams in UnionBlockInputStream or ParallelAggregatingBlockInputStream. */ - BlockInputStreamPtr stream_with_non_joined_data; + BlockInputStreams streams_with_non_joined_data; BlockInputStreamPtr & firstStream() { return streams.at(0); } @@ -105,13 +105,13 @@ class InterpreterSelectQuery : public IInterpreter for (auto & stream : streams) transform(stream); - if (stream_with_non_joined_data) - transform(stream_with_non_joined_data); + for (auto & stream : streams_with_non_joined_data) + transform(stream); } bool hasMoreThanOneStream() const { - return streams.size() + (stream_with_non_joined_data ? 1 : 0) > 1; + return streams.size() + streams_with_non_joined_data.size() > 1; } }; diff --git a/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.cpp b/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.cpp index 5e73b1e5f3e..076c290cc9d 100644 --- a/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.cpp +++ b/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.cpp @@ -224,7 +224,7 @@ BlockIO InterpreterSelectWithUnionQuery::execute() } else { - result_stream = std::make_shared>(nested_streams, nullptr, settings.max_threads, /*req_id=*/""); + result_stream = std::make_shared>(nested_streams, BlockInputStreams{}, settings.max_threads, /*req_id=*/""); nested_streams.clear(); } diff --git a/dbms/src/Interpreters/Join.cpp b/dbms/src/Interpreters/Join.cpp index 820618a6e8b..181ebcaaa64 100644 --- a/dbms/src/Interpreters/Join.cpp +++ b/dbms/src/Interpreters/Join.cpp @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -26,9 +27,17 @@ #include #include #include +#include + namespace DB { +namespace FailPoints +{ +extern const char random_join_build_failpoint[]; +extern const char random_join_prob_failpoint[]; +} // namespace FailPoints + namespace ErrorCodes { extern const int UNKNOWN_SET_DATA_VARIANT; @@ -621,6 +630,7 @@ void NO_INLINE insertFromBlockImplTypeCaseWithLock( } for (size_t insert_index = 0; insert_index < segment_index_info.size(); insert_index++) { + FAIL_POINT_TRIGGER_EXCEPTION(FailPoints::random_join_build_failpoint); size_t segment_index = (insert_index + stream_index) % segment_index_info.size(); if (segment_index == segment_size) { @@ -1513,7 +1523,7 @@ void Join::joinBlockImpl(Block & block, const Maps & maps) const default: throw Exception("Unknown JOIN keys variant.", ErrorCodes::UNKNOWN_SET_DATA_VARIANT); } - + FAIL_POINT_TRIGGER_EXCEPTION(FailPoints::random_join_prob_failpoint); for (size_t i = 0; i < num_columns_to_add; ++i) { const ColumnWithTypeAndName & sample_col = sample_block_with_columns_to_add.getByPosition(i); diff --git a/dbms/src/Interpreters/Settings.h b/dbms/src/Interpreters/Settings.h index 9361e0525d2..add761c581d 100644 --- a/dbms/src/Interpreters/Settings.h +++ b/dbms/src/Interpreters/Settings.h @@ -209,7 +209,7 @@ struct Settings * Basically, limits are checked for each block (not every row). That is, the limits can be slightly violated. \ * Almost all limits apply only to SELECTs. \ * Almost all limits apply to each stream individually. \ - */ \ + */ \ \ M(SettingUInt64, max_rows_to_read, 0, "Limit on read rows from the most 'deep' sources. That is, only in the deepest subquery. When reading from a remote server, it " \ "is only checked on a remote server.") \ @@ -272,7 +272,7 @@ struct Settings M(SettingUInt64, dt_segment_delta_small_column_file_size, 8388608, "Determine whether a column file in delta is small or not. 8MB by default.") \ M(SettingUInt64, dt_segment_stable_pack_rows, DEFAULT_MERGE_BLOCK_SIZE, "Expected stable pack rows in DeltaTree Engine.") \ M(SettingFloat, dt_segment_wait_duration_factor, 1, "The factor of wait duration in a write stall.") \ - M(SettingUInt64, dt_bg_gc_check_interval, 60, "Background gc thread check interval, the unit is second.") \ + M(SettingUInt64, dt_bg_gc_check_interval, 60, "Background gc thread check interval, the unit is second.") \ M(SettingInt64, dt_bg_gc_max_segments_to_check_every_round, 100, "Max segments to check in every gc round, value less than or equal to 0 means gc no segments.") \ M(SettingFloat, dt_bg_gc_ratio_threhold_to_trigger_gc, 1.2, "Trigger segment's gc when the ratio of invalid version exceed this threhold. Values smaller than or equal to 1.0 means gc all " \ "segments") \ @@ -355,15 +355,15 @@ struct Settings M(SettingUInt64, elastic_threadpool_init_cap, 400, "The size of elastic thread pool.") \ M(SettingUInt64, elastic_threadpool_shrink_period_ms, 300000, "The shrink period(ms) of elastic thread pool.") \ M(SettingBool, enable_local_tunnel, true, "Enable local data transfer between local MPP tasks.") \ - M(SettingBool, enable_async_grpc_client, true, "Enable async grpc in MPP.") \ - M(SettingUInt64, grpc_completion_queue_pool_size, 0, "The size of gRPC completion queue pool. 0 means using hardware_concurrency.")\ + M(SettingBool, enable_async_grpc_client, true, "Enable async grpc in MPP.") \ + M(SettingUInt64, grpc_completion_queue_pool_size, 0, "The size of gRPC completion queue pool. 0 means using hardware_concurrency.") \ M(SettingBool, enable_async_server, true, "Enable async rpc server.") \ M(SettingUInt64, async_pollers_per_cq, 200, "grpc async pollers per cqs") \ M(SettingUInt64, async_cqs, 1, "grpc async cqs") \ M(SettingUInt64, preallocated_request_count_per_poller, 20, "grpc preallocated_request_count_per_poller") \ \ M(SettingUInt64, manual_compact_pool_size, 1, "The number of worker threads to handle manual compact requests.") \ - M(SettingUInt64, manual_compact_max_concurrency, 10, "Max concurrent tasks. It should be larger than pool size.") \ + M(SettingUInt64, manual_compact_max_concurrency, 10, "Max concurrent tasks. It should be larger than pool size.") \ M(SettingUInt64, manual_compact_more_until_ms, 60000, "Continuously compact more segments until reaching specified elapsed time. If 0 is specified, only one segment will be compacted each round.") // clang-format on diff --git a/dbms/src/Interpreters/executeQuery.cpp b/dbms/src/Interpreters/executeQuery.cpp index 96cfc0a58ae..78ad4b41ce6 100644 --- a/dbms/src/Interpreters/executeQuery.cpp +++ b/dbms/src/Interpreters/executeQuery.cpp @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include #include #include #include @@ -53,7 +54,10 @@ extern const int LOGICAL_ERROR; extern const int QUERY_IS_TOO_LARGE; extern const int INTO_OUTFILE_NOT_ALLOWED; } // namespace ErrorCodes - +namespace FailPoints +{ +extern const char random_interpreter_failpoint[]; +} // namespace FailPoints namespace { void checkASTSizeLimits(const IAST & ast, const Settings & settings) @@ -226,6 +230,7 @@ std::tuple executeQueryImpl( context.setProcessListElement(&process_list_entry->get()); } + FAIL_POINT_TRIGGER_EXCEPTION(FailPoints::random_interpreter_failpoint); auto interpreter = query_src.interpreter(context, stage); res = interpreter->execute(); diff --git a/dbms/src/Server/CLIService.h b/dbms/src/Server/CLIService.h index 9078fa991f3..0acffebb577 100644 --- a/dbms/src/Server/CLIService.h +++ b/dbms/src/Server/CLIService.h @@ -126,6 +126,8 @@ CLIService::TiFlashProxyConfig::TiFlashProxyConfig(Poco::Util::Layer args.push_back(v.first.data()); args.push_back(v.second.data()); } + // Start the decryption service without starting the raftstore service + args.push_back("--only-decryption"); is_proxy_runnable = true; } template diff --git a/dbms/src/Server/Server.cpp b/dbms/src/Server/Server.cpp index 1bb35e51866..aabca11cf9c 100644 --- a/dbms/src/Server/Server.cpp +++ b/dbms/src/Server/Server.cpp @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -53,10 +54,15 @@ #include #include #include +#include +#include +#include #include #include #include +#include #include +#include #include #include #include @@ -81,12 +87,6 @@ #include #include -#include "HTTPHandlerFactory.h" -#include "MetricsPrometheus.h" -#include "MetricsTransmitter.h" -#include "StatusFile.h" -#include "TCPHandlerFactory.h" - #if Poco_NetSSL_FOUND #include #include @@ -151,6 +151,7 @@ void loadMiConfig(Logger * log) } #undef TRY_LOAD_CONF #endif + namespace { [[maybe_unused]] void tryLoadBoolConfigFromEnv(Poco::Logger * log, bool & target, const char * name) @@ -176,6 +177,12 @@ namespace } } // namespace +namespace CurrentMetrics +{ +extern const Metric LogicalCPUCores; +extern const Metric MemoryCapacity; +} // namespace CurrentMetrics + namespace DB { namespace ErrorCodes @@ -184,6 +191,7 @@ extern const int NO_ELEMENTS_IN_CONFIG; extern const int SUPPORT_IS_DISABLED; extern const int ARGUMENT_OUT_OF_BOUND; extern const int INVALID_CONFIG_PARAMETER; +extern const int IP_ADDRESS_NOT_ALLOWED; } // namespace ErrorCodes namespace Debug @@ -621,6 +629,10 @@ class Server::FlashGrpcServerHolder } } flash_grpc_server = builder.BuildAndStart(); + if (!flash_grpc_server) + { + throw Exception("Exception happens when start grpc server, the flash.service_addr may be invalid, flash.service_addr is " + raft_config.flash_server_addr, ErrorCodes::IP_ADDRESS_NOT_ALLOWED); + } LOG_FMT_INFO(log, "Flash grpc server listening on [{}]", raft_config.flash_server_addr); Debug::setServiceAddr(raft_config.flash_server_addr); if (enable_async_server) @@ -961,7 +973,10 @@ class Server::TcpHttpServersHolder LOG_DEBUG(log, debug_msg); } - const std::vector> & getServers() const { return servers; } + const std::vector> & getServers() const + { + return servers; + } private: Server & server; @@ -977,6 +992,7 @@ int Server::main(const std::vector & /*args*/) Poco::Logger * log = &logger(); #ifdef FIU_ENABLE fiu_init(0); // init failpoint + FailPointHelper::initRandomFailPoints(config(), log); #endif UpdateMallocConfig(log); @@ -996,7 +1012,6 @@ int Server::main(const std::vector & /*args*/) #ifdef TIFLASH_ENABLE_SVE_SUPPORT tryLoadBoolConfigFromEnv(log, simd_option::ENABLE_SVE, "TIFLASH_ENABLE_SVE"); #endif - registerFunctions(); registerAggregateFunctions(); registerWindowFunctions(); @@ -1128,6 +1143,12 @@ int Server::main(const std::vector & /*args*/) global_context->getPathCapacity(), global_context->getFileProvider()); + /// Initialize the background & blockable background thread pool. + Settings & settings = global_context->getSettingsRef(); + LOG_FMT_INFO(log, "Background & Blockable Background pool size: {}", settings.background_pool_size); + auto & bg_pool = global_context->initializeBackgroundPool(settings.background_pool_size); + auto & blockable_bg_pool = global_context->initializeBlockableBackgroundPool(settings.background_pool_size); + global_context->initializePageStorageMode(global_context->getPathPool(), STORAGE_FORMAT_CURRENT.page); global_context->initializeGlobalStoragePoolIfNeed(global_context->getPathPool()); LOG_FMT_INFO(log, "Global PageStorage run mode is {}", static_cast(global_context->getPageStorageRunMode())); @@ -1244,13 +1265,6 @@ int Server::main(const std::vector & /*args*/) /// Load global settings from default_profile and system_profile. /// It internally depends on UserConfig::parseSettings. global_context->setDefaultProfiles(config()); - Settings & settings = global_context->getSettingsRef(); - - /// Initialize the background thread pool. - /// It internally depends on settings.background_pool_size, - /// so must be called after settings has been load. - auto & bg_pool = global_context->getBackgroundPool(); - auto & blockable_bg_pool = global_context->getBlockableBackgroundPool(); /// Initialize RateLimiter. global_context->initializeRateLimiter(config(), bg_pool, blockable_bg_pool); @@ -1417,6 +1431,8 @@ int Server::main(const std::vector & /*args*/) { // on ARM processors it can show only enabled at current moment cores + CurrentMetrics::set(CurrentMetrics::LogicalCPUCores, server_info.cpu_info.logical_cores); + CurrentMetrics::set(CurrentMetrics::MemoryCapacity, server_info.memory_info.capacity); LOG_FMT_INFO( log, "Available RAM = {}; physical cores = {}; logical cores = {}.", diff --git a/dbms/src/Storages/BackgroundProcessingPool.h b/dbms/src/Storages/BackgroundProcessingPool.h index 1ba6c4efcf8..49a01b3a397 100644 --- a/dbms/src/Storages/BackgroundProcessingPool.h +++ b/dbms/src/Storages/BackgroundProcessingPool.h @@ -81,7 +81,7 @@ class BackgroundProcessingPool using TaskHandle = std::shared_ptr; - BackgroundProcessingPool(int size_); + explicit BackgroundProcessingPool(int size_); size_t getNumberOfThreads() const { return size; } @@ -96,7 +96,7 @@ class BackgroundProcessingPool /// 2. thread B also get the same task /// 3. thread A finish the execution of the task quickly, release the task and try to update the next schedule time of the task /// 4. thread B find the task is not occupied and execute the task again almost immediately - TaskHandle addTask(const Task & task, const bool multi = true, const size_t interval_ms = 0); + TaskHandle addTask(const Task & task, bool multi = true, size_t interval_ms = 0); void removeTask(const TaskHandle & task); ~BackgroundProcessingPool(); diff --git a/dbms/src/Storages/DeltaMerge/Delta/DeltaValueSpace.cpp b/dbms/src/Storages/DeltaMerge/Delta/DeltaValueSpace.cpp index 132732d6989..8a69b7573e2 100644 --- a/dbms/src/Storages/DeltaMerge/Delta/DeltaValueSpace.cpp +++ b/dbms/src/Storages/DeltaMerge/Delta/DeltaValueSpace.cpp @@ -141,6 +141,19 @@ bool DeltaValueSpace::ingestColumnFiles(DMContext & /*context*/, const RowKeyRan bool DeltaValueSpace::flush(DMContext & context) { + bool v = false; + if (!is_flushing.compare_exchange_strong(v, true)) + { + // other thread is flushing, just return. + LOG_FMT_DEBUG(log, "{}, Flush stop because other thread is flushing", simpleInfo()); + return false; + } + SCOPE_EXIT({ + bool v = true; + if (!is_flushing.compare_exchange_strong(v, false)) + throw Exception(simpleInfo() + " is expected to be flushing", ErrorCodes::LOGICAL_ERROR); + }); + LOG_FMT_DEBUG(log, "{}, Flush start", info()); /// We have two types of data needed to flush to disk: diff --git a/dbms/src/Storages/DeltaMerge/Delta/DeltaValueSpace.h b/dbms/src/Storages/DeltaMerge/Delta/DeltaValueSpace.h index 8f14682caa8..04fb97b3004 100644 --- a/dbms/src/Storages/DeltaMerge/Delta/DeltaValueSpace.h +++ b/dbms/src/Storages/DeltaMerge/Delta/DeltaValueSpace.h @@ -77,6 +77,11 @@ class DeltaValueSpace /// Note that those things can not be done at the same time. std::atomic_bool is_updating = false; + /// Note that it's safe to do multiple flush concurrently but only one of them can succeed, + /// and other thread's work is just a waste of resource. + /// So we only allow one flush task running at any time to aviod waste resource. + std::atomic_bool is_flushing = false; + std::atomic last_try_flush_rows = 0; std::atomic last_try_flush_bytes = 0; std::atomic last_try_compact_column_files = 0; @@ -159,6 +164,8 @@ class DeltaValueSpace size_t getTotalCacheBytes() const; size_t getValidCacheRows() const; + bool isFlushing() const { return is_flushing; } + bool isUpdating() const { return is_updating; } bool tryLockUpdating() diff --git a/dbms/src/Storages/DeltaMerge/DeltaMergeStore.cpp b/dbms/src/Storages/DeltaMerge/DeltaMergeStore.cpp index 195ed5c53c2..935a4ac111c 100644 --- a/dbms/src/Storages/DeltaMerge/DeltaMergeStore.cpp +++ b/dbms/src/Storages/DeltaMerge/DeltaMergeStore.cpp @@ -100,6 +100,9 @@ extern const char exception_after_drop_segment[]; namespace DM { +// It is used to prevent hash conflict of file caches. +static std::atomic DELTA_MERGE_STORE_HASH_SALT{0}; + // ================================================ // MergeDeltaTaskPool // ================================================ @@ -980,14 +983,14 @@ void DeltaMergeStore::deleteRange(const Context & db_context, const DB::Settings checkSegmentUpdate(dm_context, segment, ThreadType::Write); } -void DeltaMergeStore::flushCache(const DMContextPtr & dm_context, const RowKeyRange & range) +bool DeltaMergeStore::flushCache(const DMContextPtr & dm_context, const RowKeyRange & range, bool try_until_succeed) { RowKeyRange cur_range = range; while (!cur_range.none()) { RowKeyRange segment_range; - // Keep trying until succeeded. + // Keep trying until succeeded if needed. while (true) { SegmentPtr segment; @@ -1010,10 +1013,15 @@ void DeltaMergeStore::flushCache(const DMContextPtr & dm_context, const RowKeyRa { break; } + else if (!try_until_succeed) + { + return false; + } } cur_range.setStart(segment_range.end); } + return true; } void DeltaMergeStore::mergeDeltaAll(const Context & context) @@ -1055,6 +1063,13 @@ std::optional DeltaMergeStore::mergeDeltaBySegment(const Contex segment = segment_it->second; } + if (!segment->flushCache(*dm_context)) + { + // If the flush failed, it means there are parallel updates to the segment in the background. + // In this case, we try again. + continue; + } + const auto new_segment = segmentMergeDelta(*dm_context, segment, run_thread); if (new_segment) { @@ -1347,6 +1362,12 @@ void DeltaMergeStore::checkSegmentUpdate(const DMContextPtr & dm_context, const && (delta_rows - delta_last_try_flush_rows >= delta_cache_limit_rows || delta_bytes - delta_last_try_flush_bytes >= delta_cache_limit_bytes); bool should_foreground_flush = unsaved_rows >= delta_cache_limit_rows * 3 || unsaved_bytes >= delta_cache_limit_bytes * 3; + /// For write thread, we want to avoid foreground flush to block the process of apply raft command. + /// So we increase the threshold of foreground flush for write thread. + if (thread_type == ThreadType::Write) + { + should_foreground_flush = unsaved_rows >= delta_cache_limit_rows * 10 || unsaved_bytes >= delta_cache_limit_bytes * 10; + } bool should_background_merge_delta = ((delta_check_rows >= delta_limit_rows || delta_check_bytes >= delta_limit_bytes) // && (delta_rows - delta_last_try_merge_delta_rows >= delta_cache_limit_rows @@ -1404,9 +1425,16 @@ void DeltaMergeStore::checkSegmentUpdate(const DMContextPtr & dm_context, const } else if (should_background_flush) { - delta_last_try_flush_rows = delta_rows; - delta_last_try_flush_bytes = delta_bytes; - try_add_background_task(BackgroundTask{TaskType::Flush, dm_context, segment, {}}); + /// It's meaningless to add more flush tasks if the segment is flushing. + /// Because only one flush task can proceed at any time. + /// And after the current flush task finished, + /// it will call `checkSegmentUpdate` again to check whether there is more flush task to do. + if (!segment->isFlushing()) + { + delta_last_try_flush_rows = delta_rows; + delta_last_try_flush_bytes = delta_bytes; + try_add_background_task(BackgroundTask{TaskType::Flush, dm_context, segment, {}}); + } } } @@ -1502,7 +1530,12 @@ void DeltaMergeStore::checkSegmentUpdate(const DMContextPtr & dm_context, const return false; }; auto try_bg_compact = [&]() { - if (should_compact) + /// Compact task should be a really low priority task. + /// And if the segment is flushing, + /// we should avoid adding background compact task to reduce lock contention on the segment and save disk throughput. + /// And after the current flush task complete, + /// it will call `checkSegmentUpdate` again to check whether there is other kinds of task to do. + if (should_compact && !segment->isFlushing()) { delta_last_try_compact_column_files = column_file_count; try_add_background_task(BackgroundTask{TaskType::Compact, dm_context, segment, {}}); diff --git a/dbms/src/Storages/DeltaMerge/DeltaMergeStore.h b/dbms/src/Storages/DeltaMerge/DeltaMergeStore.h index 705481ca107..36a72d3cda5 100644 --- a/dbms/src/Storages/DeltaMerge/DeltaMergeStore.h +++ b/dbms/src/Storages/DeltaMerge/DeltaMergeStore.h @@ -51,7 +51,7 @@ inline static const PageId DELTA_MERGE_FIRST_SEGMENT_ID = 1; struct SegmentStat { - UInt64 segment_id; + UInt64 segment_id = 0; RowKeyRange range; UInt64 rows = 0; @@ -145,9 +145,6 @@ struct DeltaMergeStoreStat UInt64 background_tasks_length = 0; }; -// It is used to prevent hash conflict of file caches. -static std::atomic DELTA_MERGE_STORE_HASH_SALT{0}; - class DeltaMergeStore : private boost::noncopyable { public: @@ -367,14 +364,14 @@ class DeltaMergeStore : private boost::noncopyable const SegmentIdSet & read_segments = {}, size_t extra_table_id_index = InvalidColumnID); - /// Force flush all data to disk. - void flushCache(const Context & context, const RowKeyRange & range) + /// Try flush all data in `range` to disk and return whether the task succeed. + bool flushCache(const Context & context, const RowKeyRange & range, bool try_until_succeed = true) { auto dm_context = newDMContext(context, context.getSettingsRef()); - flushCache(dm_context, range); + return flushCache(dm_context, range, try_until_succeed); } - void flushCache(const DMContextPtr & dm_context, const RowKeyRange & range); + bool flushCache(const DMContextPtr & dm_context, const RowKeyRange & range, bool try_until_succeed = true); /// Merge delta into the stable layer for all segments. /// @@ -386,7 +383,7 @@ class DeltaMergeStore : private boost::noncopyable /// If there is no segment found by the start key, nullopt is returned. /// /// This function is called when using `ALTER TABLE [TABLE] COMPACT ...` from TiDB. - std::optional mergeDeltaBySegment(const Context & context, const DM::RowKeyValue & start_key, const TaskRunThread run_thread); + std::optional mergeDeltaBySegment(const Context & context, const DM::RowKeyValue & start_key, TaskRunThread run_thread); /// Compact the delta layer, merging multiple fragmented delta files into larger ones. /// This is a minor compaction as it does not merge the delta into stable layer. diff --git a/dbms/src/Storages/DeltaMerge/DeltaTree.h b/dbms/src/Storages/DeltaMerge/DeltaTree.h index 47674ab2cfc..29e127fe35f 100644 --- a/dbms/src/Storages/DeltaMerge/DeltaTree.h +++ b/dbms/src/Storages/DeltaMerge/DeltaTree.h @@ -14,6 +14,7 @@ #pragma once +#include #include #include #include @@ -810,6 +811,20 @@ class DeltaTree template InternPtr afterNodeUpdated(T * node); +#ifdef __x86_64__ + template + InternPtr afterNodeUpdatedGeneric(T * node); + + template + InternPtr afterNodeUpdatedAVX512(T * node); + + template + InternPtr afterNodeUpdatedAVX(T * node); + + template + InternPtr afterNodeUpdatedSSE4(T * node); +#endif + inline void afterLeafUpdated(LeafPtr leaf) { if (leaf->count == 0 && isRootOnly()) @@ -1348,158 +1363,86 @@ typename DT_CLASS::InterAndSid DT_CLASS::submitMinSid(T * node, UInt64 subtree_m } } -DT_TEMPLATE -template -typename DT_CLASS::InternPtr DT_CLASS::afterNodeUpdated(T * node) +#ifndef __x86_64__ +#define TIFLASH_DT_IMPL_NAME afterNodeUpdated +#include "DeltaTree.ipp" +#undef TIFLASH_DT_IMPL_NAME +#else + +// generic implementation +#define TIFLASH_DT_IMPL_NAME afterNodeUpdatedGeneric +#include "DeltaTree.ipp" +#undef TIFLASH_DT_IMPL_NAME + +// avx512 implementation +TIFLASH_BEGIN_AVX512_SPECIFIC_CODE +#define TIFLASH_DT_IMPL_NAME afterNodeUpdatedAVX512 +#include "DeltaTree.ipp" +#undef TIFLASH_DT_IMPL_NAME +TIFLASH_END_TARGET_SPECIFIC_CODE + +// avx implementation +TIFLASH_BEGIN_AVX_SPECIFIC_CODE +#define TIFLASH_DT_IMPL_NAME afterNodeUpdatedAVX +#include "DeltaTree.ipp" +#undef TIFLASH_DT_IMPL_NAME +TIFLASH_END_TARGET_SPECIFIC_CODE + +// sse4 implementation +TIFLASH_BEGIN_SSE4_SPECIFIC_CODE +#define TIFLASH_DT_IMPL_NAME afterNodeUpdatedSSE4 +#include "DeltaTree.ipp" +#undef TIFLASH_DT_IMPL_NAME +TIFLASH_END_TARGET_SPECIFIC_CODE + +namespace Impl { - if (!node) - return {}; - - constexpr bool is_leaf = std::is_same::value; +enum class DeltaTreeVariant +{ + Generic, + SSE4, + AVX, + AVX512 +}; - if (root == asNode(node) && !isLeaf(root) && node->count == 1) +static inline DeltaTreeVariant resolveDeltaTreeVariant() +{ + if (DB::TargetSpecific::AVX512Checker::runtimeSupport()) { - /// Decrease tree height. - root = as(Intern, root)->children[0]; - - --(node->count); - freeNode(node); - - if (isLeaf(root)) - as(Leaf, root)->parent = nullptr; - else - as(Intern, root)->parent = nullptr; - --height; - - LOG_FMT_TRACE(log, "height {} -> {}", (height + 1), height); - - return {}; + return DeltaTreeVariant::AVX512; } - - auto parent = node->parent; - bool parent_updated = false; - - if (T::overflow(node->count)) // split + if (DB::TargetSpecific::AVXChecker::runtimeSupport()) { - if (!parent) - { - /// Increase tree height. - parent = createNode(); - root = asNode(parent); - - parent->deltas[0] = checkDelta(node->getDelta()); - parent->children[0] = asNode(node); - ++(parent->count); - parent->refreshChildParent(); - - ++height; - - LOG_FMT_TRACE(log, "height {} -> {}", (height - 1), height); - } - - auto pos = parent->searchChild(asNode(node)); - - T * next_n = createNode(); - - UInt64 sep_sid = node->split(next_n); - - // handle parent update - parent->shiftEntries(pos + 1, 1); - // for current node - parent->deltas[pos] = checkDelta(node->getDelta()); - // for next node - parent->sids[pos] = sep_sid; - parent->deltas[pos + 1] = checkDelta(next_n->getDelta()); - parent->children[pos + 1] = asNode(next_n); - - ++(parent->count); - - if constexpr (is_leaf) - { - if (as(Leaf, node) == right_leaf) - right_leaf = as(Leaf, next_n); - } - - parent_updated = true; + return DeltaTreeVariant::AVX; } - else if (T::underflow(node->count) && root != asNode(node)) // adopt or merge + if (DB::TargetSpecific::SSE4Checker::runtimeSupport()) { - auto pos = parent->searchChild(asNode(node)); - - // currently we always adopt from the right one if possible - bool is_sibling_left; - size_t sibling_pos; - T * sibling; - - if (unlikely(parent->count <= 1)) - throw Exception("Unexpected parent entry count: " + DB::toString(parent->count)); - - if (pos == parent->count - 1) - { - is_sibling_left = true; - sibling_pos = pos - 1; - sibling = as(T, parent->children[sibling_pos]); - } - else - { - is_sibling_left = false; - sibling_pos = pos + 1; - sibling = as(T, parent->children[sibling_pos]); - } - - if (unlikely(sibling->parent != node->parent)) - throw Exception("parent not the same"); - - auto after_adopt = (node->count + sibling->count) / 2; - if (T::underflow(after_adopt)) - { - // Do merge. - // adoption won't work because the sibling doesn't have enough entries. - - node->merge(sibling, is_sibling_left, pos); - freeNode(sibling); - - pos = std::min(pos, sibling_pos); - parent->deltas[pos] = checkDelta(node->getDelta()); - parent->children[pos] = asNode(node); - parent->shiftEntries(pos + 2, -1); - - if constexpr (is_leaf) - { - if (is_sibling_left && (as(Leaf, sibling) == left_leaf)) - left_leaf = as(Leaf, node); - else if (!is_sibling_left && as(Leaf, sibling) == right_leaf) - right_leaf = as(Leaf, node); - } - --(parent->count); - } - else - { - // Do adoption. - - auto adopt_count = after_adopt - node->count; - auto new_sep_sid = node->adopt(sibling, is_sibling_left, adopt_count, pos); + return DeltaTreeVariant::SSE4; + } + return DeltaTreeVariant::Generic; +} - parent->sids[std::min(pos, sibling_pos)] = new_sep_sid; - parent->deltas[pos] = checkDelta(node->getDelta()); - parent->deltas[sibling_pos] = checkDelta(sibling->getDelta()); - } +static inline DeltaTreeVariant DELTA_TREE_VARIANT = resolveDeltaTreeVariant(); +} // namespace Impl - parent_updated = true; - } - else if (parent) +DT_TEMPLATE +template +typename DT_CLASS::InternPtr DT_CLASS::afterNodeUpdated(T * node) +{ + switch (Impl::DELTA_TREE_VARIANT) { - auto pos = parent->searchChild(asNode(node)); - auto delta = node->getDelta(); - parent_updated = parent->deltas[pos] != delta; - parent->deltas[pos] = checkDelta(delta); + case Impl::DeltaTreeVariant::Generic: + return afterNodeUpdatedGeneric(node); + case Impl::DeltaTreeVariant::SSE4: + return afterNodeUpdatedSSE4(node); + case Impl::DeltaTreeVariant::AVX: + return afterNodeUpdatedAVX(node); + case Impl::DeltaTreeVariant::AVX512: + return afterNodeUpdatedAVX512(node); } - - if (parent_updated) - return parent; - else - return {}; } +#endif + #undef as #undef asNode diff --git a/dbms/src/Storages/DeltaMerge/DeltaTree.ipp b/dbms/src/Storages/DeltaMerge/DeltaTree.ipp new file mode 100644 index 00000000000..27b8a3b96f1 --- /dev/null +++ b/dbms/src/Storages/DeltaMerge/DeltaTree.ipp @@ -0,0 +1,165 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +DT_TEMPLATE +template +__attribute__((noinline, flatten)) typename DT_CLASS::InternPtr DT_CLASS::TIFLASH_DT_IMPL_NAME(T * node) +{ + if (!node) + return {}; + + constexpr bool is_leaf = std::is_same::value; + + if (root == asNode(node) && !isLeaf(root) && node->count == 1) + { + /// Decrease tree height. + root = as(Intern, root)->children[0]; + + --(node->count); + freeNode(node); + + if (isLeaf(root)) + as(Leaf, root)->parent = nullptr; + else + as(Intern, root)->parent = nullptr; + --height; + + LOG_FMT_TRACE(log, "height {} -> {}", (height + 1), height); + + return {}; + } + + auto parent = node->parent; + bool parent_updated = false; + + if (T::overflow(node->count)) // split + { + if (!parent) + { + /// Increase tree height. + parent = createNode(); + root = asNode(parent); + + parent->deltas[0] = checkDelta(node->getDelta()); + parent->children[0] = asNode(node); + ++(parent->count); + parent->refreshChildParent(); + + ++height; + + LOG_FMT_TRACE(log, "height {} -> {}", (height - 1), height); + } + + auto pos = parent->searchChild(asNode(node)); + + T * next_n = createNode(); + + UInt64 sep_sid = node->split(next_n); + + // handle parent update + parent->shiftEntries(pos + 1, 1); + // for current node + parent->deltas[pos] = checkDelta(node->getDelta()); + // for next node + parent->sids[pos] = sep_sid; + parent->deltas[pos + 1] = checkDelta(next_n->getDelta()); + parent->children[pos + 1] = asNode(next_n); + + ++(parent->count); + + if constexpr (is_leaf) + { + if (as(Leaf, node) == right_leaf) + right_leaf = as(Leaf, next_n); + } + + parent_updated = true; + } + else if (T::underflow(node->count) && root != asNode(node)) // adopt or merge + { + auto pos = parent->searchChild(asNode(node)); + + // currently we always adopt from the right one if possible + bool is_sibling_left; + size_t sibling_pos; + T * sibling; + + if (unlikely(parent->count <= 1)) + throw Exception("Unexpected parent entry count: " + DB::toString(parent->count)); + + if (pos == parent->count - 1) + { + is_sibling_left = true; + sibling_pos = pos - 1; + sibling = as(T, parent->children[sibling_pos]); + } + else + { + is_sibling_left = false; + sibling_pos = pos + 1; + sibling = as(T, parent->children[sibling_pos]); + } + + if (unlikely(sibling->parent != node->parent)) + throw Exception("parent not the same"); + + auto after_adopt = (node->count + sibling->count) / 2; + if (T::underflow(after_adopt)) + { + // Do merge. + // adoption won't work because the sibling doesn't have enough entries. + + node->merge(sibling, is_sibling_left, pos); + freeNode(sibling); + + pos = std::min(pos, sibling_pos); + parent->deltas[pos] = checkDelta(node->getDelta()); + parent->children[pos] = asNode(node); + parent->shiftEntries(pos + 2, -1); + + if constexpr (is_leaf) + { + if (is_sibling_left && (as(Leaf, sibling) == left_leaf)) + left_leaf = as(Leaf, node); + else if (!is_sibling_left && as(Leaf, sibling) == right_leaf) + right_leaf = as(Leaf, node); + } + --(parent->count); + } + else + { + // Do adoption. + + auto adopt_count = after_adopt - node->count; + auto new_sep_sid = node->adopt(sibling, is_sibling_left, adopt_count, pos); + + parent->sids[std::min(pos, sibling_pos)] = new_sep_sid; + parent->deltas[pos] = checkDelta(node->getDelta()); + parent->deltas[sibling_pos] = checkDelta(sibling->getDelta()); + } + + parent_updated = true; + } + else if (parent) + { + auto pos = parent->searchChild(asNode(node)); + auto delta = node->getDelta(); + parent_updated = parent->deltas[pos] != delta; + parent->deltas[pos] = checkDelta(delta); + } + + if (parent_updated) + return parent; + else + return {}; +} \ No newline at end of file diff --git a/dbms/src/Storages/DeltaMerge/Segment.h b/dbms/src/Storages/DeltaMerge/Segment.h index cccfc5091b9..8058329ae91 100644 --- a/dbms/src/Storages/DeltaMerge/Segment.h +++ b/dbms/src/Storages/DeltaMerge/Segment.h @@ -300,6 +300,8 @@ class Segment : private boost::noncopyable void drop(const FileProviderPtr & file_provider, WriteBatches & wbs); + bool isFlushing() const { return delta->isFlushing(); } + RowsAndBytes getRowsAndBytesInRange( DMContext & dm_context, const SegmentSnapshotPtr & segment_snap, diff --git a/dbms/src/Storages/DeltaMerge/tests/DMTestEnv.h b/dbms/src/Storages/DeltaMerge/tests/DMTestEnv.h index b35dae0cbe2..84fafbc46ef 100644 --- a/dbms/src/Storages/DeltaMerge/tests/DMTestEnv.h +++ b/dbms/src/Storages/DeltaMerge/tests/DMTestEnv.h @@ -273,7 +273,8 @@ class DMTestEnv DataTypePtr pk_type = EXTRA_HANDLE_COLUMN_INT_TYPE, bool is_common_handle = false, size_t rowkey_column_size = 1, - bool with_internal_columns = true) + bool with_internal_columns = true, + bool is_deleted = false) { Block block; const size_t num_rows = (end - beg); @@ -324,7 +325,7 @@ class DMTestEnv VERSION_COLUMN_ID)); // tag_col block.insert(DB::tests::createColumn( - std::vector(num_rows, 0), + std::vector(num_rows, is_deleted), TAG_COLUMN_NAME, TAG_COLUMN_ID)); } diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_delta_merge_store.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_delta_merge_store.cpp index d46e1b7aa36..b7913c44a2c 100644 --- a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_delta_merge_store.cpp +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_delta_merge_store.cpp @@ -3762,6 +3762,55 @@ try CATCH +// Verify that unflushed data will also be compacted. +TEST_P(DeltaMergeStoreMergeDeltaBySegmentTest, Flush) +try +{ + { + // Write data to first 3 segments and flush. + auto newly_written_rows = helper->rows_by_segments[0] + helper->rows_by_segments[1] + helper->rows_by_segments[2]; + Block block = DMTestEnv::prepareSimpleWriteBlock(0, newly_written_rows, false, pk_type, 5 /* new tso */); + store->write(*db_context, db_context->getSettingsRef(), block); + store->flushCache(dm_context, RowKeyRange::newAll(store->isCommonHandle(), store->getRowKeyColumnSize())); + + helper->expected_delta_rows[0] += helper->rows_by_segments[0]; + helper->expected_delta_rows[1] += helper->rows_by_segments[1]; + helper->expected_delta_rows[2] += helper->rows_by_segments[2]; + helper->verifyExpectedRowsForAllSegments(); + + auto segment1 = std::next(store->segments.begin())->second; + ASSERT_EQ(segment1->getDelta()->getUnsavedRows(), 0); + } + { + // Write new data to segment[1] without flush. + auto newly_written_rows = helper->rows_by_segments[1]; + Block block = DMTestEnv::prepareSimpleWriteBlock(helper->rows_by_segments[0], helper->rows_by_segments[0] + newly_written_rows, false, pk_type, 10 /* new tso */); + store->write(*db_context, db_context->getSettingsRef(), block); + + helper->expected_delta_rows[1] += helper->rows_by_segments[1]; + helper->verifyExpectedRowsForAllSegments(); + + auto segment1 = std::next(store->segments.begin())->second; + ASSERT_GT(segment1->getDelta()->getUnsavedRows(), 0); + } + { + auto segment1 = std::next(store->segments.begin())->second; + auto result = store->mergeDeltaBySegment(*db_context, segment1->getRowKeyRange().start, DeltaMergeStore::TaskRunThread::Foreground); + ASSERT_NE(result, std::nullopt); + + segment1 = std::next(store->segments.begin())->second; + ASSERT_EQ(*result, segment1->getRowKeyRange()); + + helper->expected_stable_rows[1] += helper->expected_delta_rows[1]; + helper->expected_delta_rows[1] = 0; + helper->verifyExpectedRowsForAllSegments(); + + ASSERT_EQ(segment1->getDelta()->getUnsavedRows(), 0); + } +} +CATCH + + } // namespace tests } // namespace DM } // namespace DB diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_segment.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_segment.cpp new file mode 100644 index 00000000000..dc43ef3713b --- /dev/null +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_segment.cpp @@ -0,0 +1,100 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include +#include +#include +#include +#include + + +namespace DB +{ +namespace DM +{ +namespace tests +{ +class SegmentOperationTest : public SegmentTestBasic +{ +protected: + static void SetUpTestCase() {} +}; + +TEST_F(SegmentOperationTest, Issue4956) +try +{ + SegmentTestOptions options; + reloadWithOptions(options); + + // flush data, make the segment can be split. + writeSegment(DELTA_MERGE_FIRST_SEGMENT_ID); + flushSegmentCache(DELTA_MERGE_FIRST_SEGMENT_ID); + // write data to cache, reproduce the https://github.com/pingcap/tiflash/issues/4956 + writeSegment(DELTA_MERGE_FIRST_SEGMENT_ID); + deleteRangeSegment(DELTA_MERGE_FIRST_SEGMENT_ID); + auto segment_id = splitSegment(DELTA_MERGE_FIRST_SEGMENT_ID); + ASSERT_TRUE(segment_id.has_value()); + + mergeSegment(DELTA_MERGE_FIRST_SEGMENT_ID, *segment_id); +} +CATCH + +TEST_F(SegmentOperationTest, TestSegment) +try +{ + SegmentTestOptions options; + reloadWithOptions(options); + writeSegment(DELTA_MERGE_FIRST_SEGMENT_ID); + flushSegmentCache(DELTA_MERGE_FIRST_SEGMENT_ID); + mergeSegmentDelta(DELTA_MERGE_FIRST_SEGMENT_ID); + auto segment_id = splitSegment(DELTA_MERGE_FIRST_SEGMENT_ID); + ASSERT_TRUE(segment_id.has_value()); + + size_t origin_rows = getSegmentRowNum(DELTA_MERGE_FIRST_SEGMENT_ID); + + writeSegment(*segment_id); + flushSegmentCache(*segment_id); + deleteRangeSegment(*segment_id); + writeSegmentWithDeletedPack(*segment_id); + mergeSegment(DELTA_MERGE_FIRST_SEGMENT_ID, *segment_id); + + EXPECT_EQ(getSegmentRowNum(DELTA_MERGE_FIRST_SEGMENT_ID), origin_rows); +} +CATCH + +TEST_F(SegmentOperationTest, TestSegmentRandom) +try +{ + srand(time(nullptr)); + SegmentTestOptions options; + options.is_common_handle = true; + reloadWithOptions(options); + randomSegmentTest(100); +} +CATCH + +// run in CI weekly +TEST_F(SegmentOperationTest, DISABLED_TestSegmentRandomForCI) +try +{ + srand(time(nullptr)); + SegmentTestOptions options; + options.is_common_handle = true; + reloadWithOptions(options); + randomSegmentTest(10000); +} +CATCH + +} // namespace tests +} // namespace DM +} // namespace DB diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_segment_test_basic.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_segment_test_basic.cpp new file mode 100644 index 00000000000..c676f2e08d5 --- /dev/null +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_segment_test_basic.cpp @@ -0,0 +1,430 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ +namespace DM +{ +namespace tests +{ +void SegmentTestBasic::reloadWithOptions(SegmentTestOptions config) +{ + TiFlashStorageTestBasic::SetUp(); + options = config; + table_columns = std::make_shared(); + + root_segment = reload(config.is_common_handle); + ASSERT_EQ(root_segment->segmentId(), DELTA_MERGE_FIRST_SEGMENT_ID); + segments.clear(); + segments[DELTA_MERGE_FIRST_SEGMENT_ID] = root_segment; +} + +PageId SegmentTestBasic::createNewSegmentWithSomeData() +{ + SegmentPtr new_segment; + std::tie(root_segment, new_segment) = root_segment->split(dmContext(), tableColumns()); + + const size_t num_rows_write_per_batch = 100; + { + // write to segment and flush + Block block = DMTestEnv::prepareSimpleWriteBlock(0, num_rows_write_per_batch, false); + new_segment->write(dmContext(), std::move(block), true); + } + { + // write to segment and don't flush + Block block = DMTestEnv::prepareSimpleWriteBlock(num_rows_write_per_batch, 2 * num_rows_write_per_batch, false); + new_segment->write(dmContext(), std::move(block), false); + } + return new_segment->segmentId(); +} + +size_t SegmentTestBasic::getSegmentRowNumWithoutMVCC(PageId segment_id) +{ + auto segment = segments[segment_id]; + auto in = segment->getInputStreamRaw(dmContext(), *tableColumns()); + + size_t num_rows_read = 0; + in->readPrefix(); + while (Block block = in->read()) + { + num_rows_read += block.rows(); + } + in->readSuffix(); + return num_rows_read; +} + +size_t SegmentTestBasic::getSegmentRowNum(PageId segment_id) +{ + auto segment = segments[segment_id]; + auto in = segment->getInputStream(dmContext(), *tableColumns(), {segment->getRowKeyRange()}); + + size_t num_rows_read = 0; + in->readPrefix(); + while (Block block = in->read()) + { + num_rows_read += block.rows(); + } + in->readSuffix(); + return num_rows_read; +} + +void SegmentTestBasic::checkSegmentRow(PageId segment_id, size_t expected_row_num) +{ + auto segment = segments[segment_id]; + // read written data + auto in = segment->getInputStream(dmContext(), *tableColumns(), {segment->getRowKeyRange()}); + + size_t num_rows_read = 0; + in->readPrefix(); + while (Block block = in->read()) + { + num_rows_read += block.rows(); + } + in->readSuffix(); + ASSERT_EQ(num_rows_read, expected_row_num); +} + +std::optional SegmentTestBasic::splitSegment(PageId segment_id) +{ + auto origin_segment = segments[segment_id]; + size_t origin_segment_row_num = getSegmentRowNum(segment_id); + SegmentPtr segment, new_segment; + std::tie(segment, new_segment) = origin_segment->split(dmContext(), tableColumns()); + if (new_segment) + { + segments[new_segment->segmentId()] = new_segment; + segments[segment_id] = segment; + + EXPECT_EQ(origin_segment_row_num, getSegmentRowNum(segment_id) + getSegmentRowNum(new_segment->segmentId())); + return new_segment->segmentId(); + } + return std::nullopt; +} + +void SegmentTestBasic::mergeSegment(PageId left_segment_id, PageId right_segment_id) +{ + auto left_segment = segments[left_segment_id]; + auto right_segment = segments[right_segment_id]; + + size_t left_segment_row_num = getSegmentRowNum(left_segment_id); + size_t right_segment_row_num = getSegmentRowNum(right_segment_id); + LOG_FMT_TRACE(&Poco::Logger::root(), "merge in segment:{}:{} and {}:{}", left_segment->segmentId(), left_segment_row_num, right_segment->segmentId(), right_segment_row_num); + + SegmentPtr merged_segment = Segment::merge(dmContext(), tableColumns(), left_segment, right_segment); + segments[merged_segment->segmentId()] = merged_segment; + auto it = segments.find(right_segment->segmentId()); + if (it != segments.end()) + { + segments.erase(it); + } + EXPECT_EQ(getSegmentRowNum(merged_segment->segmentId()), left_segment_row_num + right_segment_row_num); +} + +void SegmentTestBasic::mergeSegmentDelta(PageId segment_id) +{ + auto segment = segments[segment_id]; + size_t segment_row_num = getSegmentRowNum(segment_id); + SegmentPtr merged_segment = segment->mergeDelta(dmContext(), tableColumns()); + segments[merged_segment->segmentId()] = merged_segment; + EXPECT_EQ(getSegmentRowNum(merged_segment->segmentId()), segment_row_num); +} + +void SegmentTestBasic::flushSegmentCache(PageId segment_id) +{ + auto segment = segments[segment_id]; + size_t segment_row_num = getSegmentRowNum(segment_id); + segment->flushCache(dmContext()); + EXPECT_EQ(getSegmentRowNum(segment_id), segment_row_num); +} + +std::pair SegmentTestBasic::getSegmentKeyRange(SegmentPtr segment) +{ + Int64 start_key, end_key; + if (!options.is_common_handle) + { + start_key = segment->getRowKeyRange().getStart().int_value; + end_key = segment->getRowKeyRange().getEnd().int_value; + return {start_key, end_key}; + } + EXPECT_EQ(segment->getRowKeyRange().getStart().data[0], TiDB::CodecFlagInt); + EXPECT_EQ(segment->getRowKeyRange().getEnd().data[0], TiDB::CodecFlagInt); + { + size_t cursor = 1; + start_key = DecodeInt64(cursor, String(segment->getRowKeyRange().getStart().data, segment->getRowKeyRange().getStart().size)); + } + { + size_t cursor = 1; + end_key = DecodeInt64(cursor, String(segment->getRowKeyRange().getEnd().data, segment->getRowKeyRange().getEnd().size)); + } + return {start_key, end_key}; +} + +void SegmentTestBasic::writeSegment(PageId segment_id, UInt64 write_rows) +{ + if (write_rows == 0) + { + return; + } + auto segment = segments[segment_id]; + size_t segment_row_num = getSegmentRowNumWithoutMVCC(segment_id); + std::pair keys = getSegmentKeyRange(segment); + Int64 start_key = keys.first; + Int64 end_key = keys.second; + UInt64 remain_row_num = 0; + if (static_cast(end_key - start_key) > write_rows) + { + end_key = start_key + write_rows; + } + else + { + remain_row_num = write_rows - static_cast(end_key - start_key); + } + { + // write to segment and not flush + Block block = DMTestEnv::prepareSimpleWriteBlock(start_key, end_key, false, version, DMTestEnv::pk_name, EXTRA_HANDLE_COLUMN_ID, options.is_common_handle ? EXTRA_HANDLE_COLUMN_STRING_TYPE : EXTRA_HANDLE_COLUMN_INT_TYPE, options.is_common_handle); + segment->write(dmContext(), std::move(block), false); + LOG_FMT_TRACE(&Poco::Logger::root(), "write key range [{}, {})", start_key, end_key); + version++; + } + while (remain_row_num > 0) + { + UInt64 write_num = std::min(remain_row_num, static_cast(end_key - start_key)); + Block block = DMTestEnv::prepareSimpleWriteBlock(start_key, write_num + start_key, false, version, DMTestEnv::pk_name, EXTRA_HANDLE_COLUMN_ID, options.is_common_handle ? EXTRA_HANDLE_COLUMN_STRING_TYPE : EXTRA_HANDLE_COLUMN_INT_TYPE, options.is_common_handle); + segment->write(dmContext(), std::move(block), false); + remain_row_num -= write_num; + LOG_FMT_TRACE(&Poco::Logger::root(), "write key range [{}, {})", start_key, write_num + start_key); + version++; + } + EXPECT_EQ(getSegmentRowNumWithoutMVCC(segment_id), segment_row_num + write_rows); +} + +void SegmentTestBasic::writeSegmentWithDeletedPack(PageId segment_id) +{ + UInt64 write_rows = DEFAULT_MERGE_BLOCK_SIZE; + auto segment = segments[segment_id]; + size_t segment_row_num = getSegmentRowNumWithoutMVCC(segment_id); + std::pair keys = getSegmentKeyRange(segment); + Int64 start_key = keys.first; + Int64 end_key = keys.second; + UInt64 remain_row_num = 0; + if (static_cast(end_key - start_key) > write_rows) + { + end_key = start_key + write_rows; + } + else + { + remain_row_num = write_rows - static_cast(end_key - start_key); + } + { + // write to segment and not flush + Block block = DMTestEnv::prepareSimpleWriteBlock(start_key, end_key, false, version, DMTestEnv::pk_name, EXTRA_HANDLE_COLUMN_ID, options.is_common_handle ? EXTRA_HANDLE_COLUMN_STRING_TYPE : EXTRA_HANDLE_COLUMN_INT_TYPE, options.is_common_handle, 1, true, true); + segment->write(dmContext(), std::move(block), true); + LOG_FMT_TRACE(&Poco::Logger::root(), "write key range [{}, {})", start_key, end_key); + version++; + } + while (remain_row_num > 0) + { + UInt64 write_num = std::min(remain_row_num, static_cast(end_key - start_key)); + Block block = DMTestEnv::prepareSimpleWriteBlock(start_key, write_num + start_key, false, version, DMTestEnv::pk_name, EXTRA_HANDLE_COLUMN_ID, options.is_common_handle ? EXTRA_HANDLE_COLUMN_STRING_TYPE : EXTRA_HANDLE_COLUMN_INT_TYPE, options.is_common_handle, 1, true, true); + segment->write(dmContext(), std::move(block), true); + remain_row_num -= write_num; + LOG_FMT_TRACE(&Poco::Logger::root(), "write key range [{}, {})", start_key, write_num + start_key); + version++; + } + EXPECT_EQ(getSegmentRowNumWithoutMVCC(segment_id), segment_row_num + write_rows); +} + +void SegmentTestBasic::deleteRangeSegment(PageId segment_id) +{ + auto segment = segments[segment_id]; + segment->write(dmContext(), /*delete_range*/ segment->getRowKeyRange()); + EXPECT_EQ(getSegmentRowNum(segment_id), 0); +} + +void SegmentTestBasic::writeRandomSegment() +{ + if (segments.empty()) + { + return; + } + PageId random_segment_id = getRandomSegmentId(); + LOG_FMT_TRACE(&Poco::Logger::root(), "start write segment:{}", random_segment_id); + writeSegment(random_segment_id); +} +void SegmentTestBasic::writeRandomSegmentWithDeletedPack() +{ + if (segments.empty()) + { + return; + } + PageId random_segment_id = getRandomSegmentId(); + LOG_FMT_TRACE(&Poco::Logger::root(), "start write segment with deleted pack:{}", random_segment_id); + writeSegmentWithDeletedPack(random_segment_id); +} + +void SegmentTestBasic::deleteRangeRandomSegment() +{ + if (segments.empty()) + { + return; + } + PageId random_segment_id = getRandomSegmentId(); + LOG_FMT_TRACE(&Poco::Logger::root(), "start delete range segment:{}", random_segment_id); + deleteRangeSegment(random_segment_id); +} + +void SegmentTestBasic::splitRandomSegment() +{ + if (segments.empty()) + { + return; + } + PageId random_segment_id = getRandomSegmentId(); + LOG_FMT_TRACE(&Poco::Logger::root(), "start split segment:{}", random_segment_id); + splitSegment(random_segment_id); +} + +void SegmentTestBasic::mergeRandomSegment() +{ + if (segments.empty() || segments.size() == 1) + { + return; + } + std::pair segment_pair; + segment_pair = getRandomMergeablePair(); + LOG_FMT_TRACE(&Poco::Logger::root(), "start merge segment:{} and {}", segment_pair.first, segment_pair.second); + mergeSegment(segment_pair.first, segment_pair.second); +} + +void SegmentTestBasic::mergeDeltaRandomSegment() +{ + if (segments.empty()) + { + return; + } + PageId random_segment_id = getRandomSegmentId(); + LOG_FMT_TRACE(&Poco::Logger::root(), "start merge delta in segment:{}", random_segment_id); + mergeSegmentDelta(random_segment_id); +} + +void SegmentTestBasic::flushCacheRandomSegment() +{ + if (segments.empty()) + { + return; + } + PageId random_segment_id = getRandomSegmentId(); + LOG_FMT_TRACE(&Poco::Logger::root(), "start flush cache in segment:{}", random_segment_id); + flushSegmentCache(random_segment_id); +} + +void SegmentTestBasic::randomSegmentTest(size_t operator_count) +{ + for (size_t i = 0; i < operator_count; i++) + { + auto op = static_cast(random() % SegmentOperaterMax); + segment_operator_entries[op](); + } +} + +PageId SegmentTestBasic::getRandomSegmentId() +{ + auto max_segment_id = segments.rbegin()->first; + PageId random_segment_id = random() % (max_segment_id + 1); + auto it = segments.find(random_segment_id); + while (it == segments.end()) + { + random_segment_id = random() % (max_segment_id + 1); + it = segments.find(random_segment_id); + } + return random_segment_id; +} + +std::pair SegmentTestBasic::getRandomMergeablePair() +{ + while (true) + { + PageId random_left_segment_id = getRandomSegmentId(); + PageId random_right_segment_id = random_left_segment_id; + while (random_right_segment_id == random_left_segment_id) + { + random_right_segment_id = getRandomSegmentId(); + } + auto left_segment = segments[random_left_segment_id]; + auto right_segment = segments[random_right_segment_id]; + if (compare(left_segment->getRowKeyRange().getEnd(), right_segment->getRowKeyRange().getStart()) != 0 || left_segment->nextSegmentId() != right_segment->segmentId()) + { + continue; + } + return {random_left_segment_id, random_right_segment_id}; + } +} + +RowKeyRange SegmentTestBasic::commanHandleKeyRange() +{ + String start_key, end_key; + { + WriteBufferFromOwnString ss; + ::DB::EncodeUInt(static_cast(TiDB::CodecFlagInt), ss); + ::DB::EncodeInt64(std::numeric_limits::min(), ss); + start_key = ss.releaseStr(); + } + { + WriteBufferFromOwnString ss; + ::DB::EncodeUInt(static_cast(TiDB::CodecFlagInt), ss); + ::DB::EncodeInt64(std::numeric_limits::max(), ss); + end_key = ss.releaseStr(); + } + return RowKeyRange(RowKeyValue(true, std::make_shared(start_key), 0), RowKeyValue(true, std::make_shared(end_key), 0), true, 1); +} + +SegmentPtr SegmentTestBasic::reload(bool is_common_handle, const ColumnDefinesPtr & pre_define_columns, DB::Settings && db_settings) +{ + TiFlashStorageTestBasic::reload(std::move(db_settings)); + storage_path_pool = std::make_unique(db_context->getPathPool().withTable("test", "t1", false)); + storage_pool = std::make_unique(*db_context, /*ns_id*/ 100, *storage_path_pool, "test.t1"); + storage_pool->restore(); + ColumnDefinesPtr cols = (!pre_define_columns) ? DMTestEnv::getDefaultColumns(is_common_handle ? DMTestEnv::PkType::CommonHandle : DMTestEnv::PkType::HiddenTiDBRowID) : pre_define_columns; + setColumns(cols); + + return Segment::newSegment(*dm_context, table_columns, is_common_handle ? commanHandleKeyRange() : RowKeyRange::newAll(is_common_handle, 1), storage_pool->newMetaPageId(), 0); +} + +void SegmentTestBasic::setColumns(const ColumnDefinesPtr & columns) +{ + *table_columns = *columns; + + dm_context = std::make_unique(*db_context, + *storage_path_pool, + *storage_pool, + 0, + /*min_version_*/ 0, + settings.not_compress_columns, + options.is_common_handle, + 1, + db_context->getSettingsRef()); +} +} // namespace tests +} // namespace DM +} // namespace DB diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_segment_test_basic.h b/dbms/src/Storages/DeltaMerge/tests/gtest_segment_test_basic.h new file mode 100644 index 00000000000..ab0c7d6d0be --- /dev/null +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_segment_test_basic.h @@ -0,0 +1,123 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once + +#include +#include +#include +#include +#include + +#include + +namespace DB +{ +namespace DM +{ +namespace tests +{ +class SegmentTestBasic : public DB::base::TiFlashStorageTestBasic +{ +public: + struct SegmentTestOptions + { + bool is_common_handle = false; + }; + +public: + void reloadWithOptions(SegmentTestOptions config); + + std::optional splitSegment(PageId segment_id); + void mergeSegment(PageId left_segment_id, PageId right_segment_id); + void mergeSegmentDelta(PageId segment_id); + void flushSegmentCache(PageId segment_id); + void writeSegment(PageId segment_id, UInt64 write_rows = 100); + void writeSegmentWithDeletedPack(PageId segment_id); + void deleteRangeSegment(PageId segment_id); + + + void writeRandomSegment(); + void writeRandomSegmentWithDeletedPack(); + void deleteRangeRandomSegment(); + void splitRandomSegment(); + void mergeRandomSegment(); + void mergeDeltaRandomSegment(); + void flushCacheRandomSegment(); + + void randomSegmentTest(size_t operator_count); + + PageId createNewSegmentWithSomeData(); + size_t getSegmentRowNumWithoutMVCC(PageId segment_id); + size_t getSegmentRowNum(PageId segment_id); + void checkSegmentRow(PageId segment_id, size_t expected_row_num); + std::pair getSegmentKeyRange(SegmentPtr segment); + +protected: + // + std::map segments; + + enum SegmentOperaterType + { + Write = 0, + DeleteRange, + Split, + Merge, + MergeDelta, + FlushCache, + WriteDeletedPack, + SegmentOperaterMax + }; + + const std::vector> segment_operator_entries = { + [this] { writeRandomSegment(); }, + [this] { deleteRangeRandomSegment(); }, + [this] { splitRandomSegment(); }, + [this] { mergeRandomSegment(); }, + [this] { mergeDeltaRandomSegment(); }, + [this] { flushCacheRandomSegment(); }, + [this] { + writeRandomSegmentWithDeletedPack(); + }}; + + PageId getRandomSegmentId(); + + std::pair getRandomMergeablePair(); + + RowKeyRange commanHandleKeyRange(); + + SegmentPtr reload(bool is_common_handle, const ColumnDefinesPtr & pre_define_columns = {}, DB::Settings && db_settings = DB::Settings()); + + // setColumns should update dm_context at the same time + void setColumns(const ColumnDefinesPtr & columns); + + const ColumnDefinesPtr & tableColumns() const { return table_columns; } + + DMContext & dmContext() { return *dm_context; } + +protected: + /// all these var lives as ref in dm_context + std::unique_ptr storage_path_pool; + std::unique_ptr storage_pool; + /// dm_context + std::unique_ptr dm_context; + ColumnDefinesPtr table_columns; + DM::DeltaMergeStore::Settings settings; + + SegmentPtr root_segment; + UInt64 version = 0; + SegmentTestOptions options; +}; +} // namespace tests +} // namespace DM +} // namespace DB \ No newline at end of file diff --git a/dbms/src/Storages/DeltaMerge/tests/stress/DMStressProxy.cpp b/dbms/src/Storages/DeltaMerge/tests/stress/DMStressProxy.cpp index 98cb8ef34e7..390d4001432 100644 --- a/dbms/src/Storages/DeltaMerge/tests/stress/DMStressProxy.cpp +++ b/dbms/src/Storages/DeltaMerge/tests/stress/DMStressProxy.cpp @@ -26,7 +26,7 @@ namespace tests { IDGenerator pk{0}; -IDGenerator tso{StopWatchDetail::nanoseconds(CLOCK_MONOTONIC)}; +IDGenerator tso{clock_gettime_ns(CLOCK_MONOTONIC)}; template void insertColumn(Block & block, const DataTypePtr & type, const String & name, Int64 col_id, const std::vector & values) @@ -68,6 +68,7 @@ DMStressProxy::DMStressProxy(const StressOptions & opts_) /* num_streams= */ 1, /* max_version= */ tso.get(), EMPTY_FILTER, + /* tracing_id= */ "", /* expected_block_size= */ 1024)[0]; while (Block block = in->read()) { @@ -199,6 +200,7 @@ UInt64 DMStressProxy::countRows(UInt32 rnd_break_prob) /* num_streams= */ 1, /* max_version= */ tso.get(), EMPTY_FILTER, + /* tracing_id= */ "", /* expected_block_size= */ 1024)[0]; UInt64 total_count = 0; @@ -428,6 +430,7 @@ void DMStressProxy::verify() /* num_streams= */ 1, /* max_version= */ tso.get(), EMPTY_FILTER, + /* tracing_id= */ "", /* expected_block_size= */ 1024)[0]; UInt64 dm_total_count = 0; while (Block block = in->read()) diff --git a/dbms/src/Storages/DeltaMerge/tests/stress/DMStressProxy.h b/dbms/src/Storages/DeltaMerge/tests/stress/DMStressProxy.h index 0571eafae83..6f004ed6959 100644 --- a/dbms/src/Storages/DeltaMerge/tests/stress/DMStressProxy.h +++ b/dbms/src/Storages/DeltaMerge/tests/stress/DMStressProxy.h @@ -54,7 +54,7 @@ template class IDGenerator { public: - IDGenerator(T t_) + explicit IDGenerator(T t_) : t(t_) {} std::vector get(Int32 count) @@ -81,19 +81,21 @@ class KeyLock public: static constexpr UInt32 default_key_lock_slot_count = 4096; - KeyLock(UInt32 slot_count = default_key_lock_slot_count) + explicit KeyLock(UInt32 slot_count = default_key_lock_slot_count) : key_rmutexs(slot_count) {} std::vector> getLocks(const std::vector & keys) { std::vector idxs; + idxs.reserve(keys.size()); for (Int64 key : keys) { idxs.push_back(getLockIdx(key)); } sort(idxs.begin(), idxs.end()); // Sort mutex to avoid dead lock. std::vector> locks; + locks.reserve(idxs.size()); for (UInt32 i : idxs) { locks.push_back(getLockByIdx(i)); @@ -121,7 +123,7 @@ class KeySet { public: static constexpr UInt32 default_key_set_slot_count = 4096; - KeySet(UInt32 slot_count = default_key_set_slot_count) + explicit KeySet(UInt32 slot_count = default_key_set_slot_count) : key_set_mutexs(slot_count) , key_sets(slot_count) {} @@ -181,7 +183,7 @@ class KeySet class DMStressProxy { public: - DMStressProxy(const StressOptions & opts_); + explicit DMStressProxy(const StressOptions & opts_); void run(); diff --git a/dbms/src/Storages/DeltaMerge/workload/TimestampGenerator.h b/dbms/src/Storages/DeltaMerge/workload/TimestampGenerator.h index 816820230df..56eb47f30ee 100644 --- a/dbms/src/Storages/DeltaMerge/workload/TimestampGenerator.h +++ b/dbms/src/Storages/DeltaMerge/workload/TimestampGenerator.h @@ -24,7 +24,7 @@ class TimestampGenerator { public: TimestampGenerator() - : t(StopWatchDetail::nanoseconds(CLOCK_MONOTONIC)) + : t(clock_gettime_ns(CLOCK_MONOTONIC)) {} std::vector get(int count) diff --git a/dbms/src/Storages/IManageableStorage.h b/dbms/src/Storages/IManageableStorage.h index ebf84c592e4..2ff766a9c6d 100644 --- a/dbms/src/Storages/IManageableStorage.h +++ b/dbms/src/Storages/IManageableStorage.h @@ -68,7 +68,7 @@ class IManageableStorage : public IStorage virtual void flushCache(const Context & /*context*/) {} - virtual void flushCache(const Context & /*context*/, const DM::RowKeyRange & /*range_to_flush*/) {} + virtual bool flushCache(const Context & /*context*/, const DM::RowKeyRange & /*range_to_flush*/, [[maybe_unused]] bool try_until_succeed = true) { return true; } virtual BlockInputStreamPtr status() { return {}; } diff --git a/dbms/src/Storages/Page/V3/PageDirectory.cpp b/dbms/src/Storages/Page/V3/PageDirectory.cpp index 5eb275f5af5..951da42de1c 100644 --- a/dbms/src/Storages/Page/V3/PageDirectory.cpp +++ b/dbms/src/Storages/Page/V3/PageDirectory.cpp @@ -478,7 +478,7 @@ PageSize VersionedPageEntries::getEntriesByBlobIds( bool VersionedPageEntries::cleanOutdatedEntries( UInt64 lowest_seq, std::map> * normal_entries_to_deref, - PageEntriesV3 & entries_removed, + PageEntriesV3 * entries_removed, const PageLock & /*page_lock*/) { if (type == EditRecordType::VAR_EXTERNAL) @@ -541,7 +541,10 @@ bool VersionedPageEntries::cleanOutdatedEntries( { if (iter->second.being_ref_count == 1) { - entries_removed.emplace_back(iter->second.entry); + if (entries_removed) + { + entries_removed->emplace_back(iter->second.entry); + } iter = entries.erase(iter); } // The `being_ref_count` for this version is valid. While for older versions, @@ -551,7 +554,10 @@ bool VersionedPageEntries::cleanOutdatedEntries( else { // else there are newer "entry" in the version list, the outdated entries should be removed - entries_removed.emplace_back(iter->second.entry); + if (entries_removed) + { + entries_removed->emplace_back(iter->second.entry); + } iter = entries.erase(iter); } } @@ -564,7 +570,7 @@ bool VersionedPageEntries::cleanOutdatedEntries( return entries.empty() || (entries.size() == 1 && entries.begin()->second.isDelete()); } -bool VersionedPageEntries::derefAndClean(UInt64 lowest_seq, PageIdV3Internal page_id, const PageVersion & deref_ver, const Int64 deref_count, PageEntriesV3 & entries_removed) +bool VersionedPageEntries::derefAndClean(UInt64 lowest_seq, PageIdV3Internal page_id, const PageVersion & deref_ver, const Int64 deref_count, PageEntriesV3 * entries_removed) { auto page_lock = acquireLock(); if (type == EditRecordType::VAR_EXTERNAL) @@ -1239,7 +1245,7 @@ bool PageDirectory::tryDumpSnapshot(const ReadLimiterPtr & read_limiter, const W return done_any_io; } -PageEntriesV3 PageDirectory::gcInMemEntries() +PageEntriesV3 PageDirectory::gcInMemEntries(bool return_removed_entries) { UInt64 lowest_seq = sequence.load(); @@ -1303,7 +1309,7 @@ PageEntriesV3 PageDirectory::gcInMemEntries() const bool all_deleted = iter->second->cleanOutdatedEntries( lowest_seq, &normal_entries_to_deref, - all_del_entries, + return_removed_entries ? &all_del_entries : nullptr, iter->second->acquireLock()); { @@ -1342,7 +1348,7 @@ PageEntriesV3 PageDirectory::gcInMemEntries() page_id, /*deref_ver=*/deref_counter.first, /*deref_count=*/deref_counter.second, - all_del_entries); + return_removed_entries ? &all_del_entries : nullptr); if (all_deleted) { diff --git a/dbms/src/Storages/Page/V3/PageDirectory.h b/dbms/src/Storages/Page/V3/PageDirectory.h index bd7c433022f..2f0f09f4e42 100644 --- a/dbms/src/Storages/Page/V3/PageDirectory.h +++ b/dbms/src/Storages/Page/V3/PageDirectory.h @@ -223,14 +223,14 @@ class VersionedPageEntries bool cleanOutdatedEntries( UInt64 lowest_seq, std::map> * normal_entries_to_deref, - PageEntriesV3 & entries_removed, + PageEntriesV3 * entries_removed, const PageLock & page_lock); bool derefAndClean( UInt64 lowest_seq, PageIdV3Internal page_id, const PageVersion & deref_ver, Int64 deref_count, - PageEntriesV3 & entries_removed); + PageEntriesV3 * entries_removed); void collapseTo(UInt64 seq, PageIdV3Internal page_id, PageEntriesEdit & edit); @@ -360,7 +360,9 @@ class PageDirectory bool tryDumpSnapshot(const ReadLimiterPtr & read_limiter = nullptr, const WriteLimiterPtr & write_limiter = nullptr); - PageEntriesV3 gcInMemEntries(); + // Perform a GC for in-memory entries and return the removed entries. + // If `return_removed_entries` is false, then just return an empty set. + PageEntriesV3 gcInMemEntries(bool return_removed_entries = true); std::set getAliveExternalIds(NamespaceId ns_id) const; diff --git a/dbms/src/Storages/Page/V3/PageDirectoryFactory.cpp b/dbms/src/Storages/Page/V3/PageDirectoryFactory.cpp index 483c5073ab5..968049a3273 100644 --- a/dbms/src/Storages/Page/V3/PageDirectoryFactory.cpp +++ b/dbms/src/Storages/Page/V3/PageDirectoryFactory.cpp @@ -44,7 +44,8 @@ PageDirectoryPtr PageDirectoryFactory::createFromReader(String storage_name, WAL // After restoring from the disk, we need cleanup all invalid entries in memory, or it will // try to run GC again on some entries that are already marked as invalid in BlobStore. - dir->gcInMemEntries(); + // It's no need to remove the expired entries in BlobStore, so skip filling removed_entries to imporve performance. + dir->gcInMemEntries(/*return_removed_entries=*/false); LOG_FMT_INFO(DB::Logger::get("PageDirectoryFactory", storage_name), "PageDirectory restored [max_page_id={}] [max_applied_ver={}]", dir->getMaxId(), dir->sequence); if (blob_stats) @@ -84,7 +85,8 @@ PageDirectoryPtr PageDirectoryFactory::createFromEdit(String storage_name, FileP // After restoring from the disk, we need cleanup all invalid entries in memory, or it will // try to run GC again on some entries that are already marked as invalid in BlobStore. - dir->gcInMemEntries(); + // It's no need to remove the expired entries in BlobStore when restore, so no need to fill removed_entries. + dir->gcInMemEntries(/*return_removed_entries=*/false); if (blob_stats) { diff --git a/dbms/src/Storages/Page/V3/tests/gtest_page_directory.cpp b/dbms/src/Storages/Page/V3/tests/gtest_page_directory.cpp index 83e07f75d37..6d6ef41630f 100644 --- a/dbms/src/Storages/Page/V3/tests/gtest_page_directory.cpp +++ b/dbms/src/Storages/Page/V3/tests/gtest_page_directory.cpp @@ -644,14 +644,14 @@ class VersionedEntriesTest : public ::testing::Test { DerefCounter deref_counter; PageEntriesV3 removed_entries; - bool all_removed = entries.cleanOutdatedEntries(seq, &deref_counter, removed_entries, entries.acquireLock()); + bool all_removed = entries.cleanOutdatedEntries(seq, &deref_counter, &removed_entries, entries.acquireLock()); return {all_removed, removed_entries, deref_counter}; } std::tuple runDeref(UInt64 seq, PageVersion ver, Int64 decrease_num) { PageEntriesV3 removed_entries; - bool all_removed = entries.derefAndClean(seq, buildV3Id(TEST_NAMESPACE_ID, page_id), ver, decrease_num, removed_entries); + bool all_removed = entries.derefAndClean(seq, buildV3Id(TEST_NAMESPACE_ID, page_id), ver, decrease_num, &removed_entries); return {all_removed, removed_entries}; } diff --git a/dbms/src/Storages/StorageDeltaMerge.cpp b/dbms/src/Storages/StorageDeltaMerge.cpp index 67d32c73a05..a6de4efb3ac 100644 --- a/dbms/src/Storages/StorageDeltaMerge.cpp +++ b/dbms/src/Storages/StorageDeltaMerge.cpp @@ -775,12 +775,12 @@ void StorageDeltaMerge::checkStatus(const Context & context) void StorageDeltaMerge::flushCache(const Context & context) { - flushCache(context, DM::RowKeyRange::newAll(is_common_handle, rowkey_column_size)); + flushCache(context, DM::RowKeyRange::newAll(is_common_handle, rowkey_column_size), /* try_until_succeed */ true); } -void StorageDeltaMerge::flushCache(const Context & context, const DM::RowKeyRange & range_to_flush) +bool StorageDeltaMerge::flushCache(const Context & context, const DM::RowKeyRange & range_to_flush, bool try_until_succeed) { - getAndMaybeInitStore()->flushCache(context, range_to_flush); + return getAndMaybeInitStore()->flushCache(context, range_to_flush, try_until_succeed); } void StorageDeltaMerge::mergeDelta(const Context & context) diff --git a/dbms/src/Storages/StorageDeltaMerge.h b/dbms/src/Storages/StorageDeltaMerge.h index 79ee225d237..9e4ab12ad4f 100644 --- a/dbms/src/Storages/StorageDeltaMerge.h +++ b/dbms/src/Storages/StorageDeltaMerge.h @@ -73,7 +73,7 @@ class StorageDeltaMerge void flushCache(const Context & context) override; - void flushCache(const Context & context, const DM::RowKeyRange & range_to_flush) override; + bool flushCache(const Context & context, const DM::RowKeyRange & range_to_flush, bool try_until_succeed) override; /// Merge delta into the stable layer for all segments. /// diff --git a/dbms/src/Storages/Transaction/Collator.cpp b/dbms/src/Storages/Transaction/Collator.cpp index a9b4d0784be..1b0221a6829 100644 --- a/dbms/src/Storages/Transaction/Collator.cpp +++ b/dbms/src/Storages/Transaction/Collator.cpp @@ -13,6 +13,7 @@ // limitations under the License. #include +#include #include #include @@ -29,17 +30,10 @@ TiDBCollators dummy_collators; std::vector dummy_sort_key_contaners; std::string dummy_sort_key_contaner; -std::string_view rtrim(const char * s, size_t length) +ALWAYS_INLINE std::string_view rtrim(const char * s, size_t length) { auto v = std::string_view(s, length); - size_t end = v.find_last_not_of(' '); - return end == std::string_view::npos ? "" : v.substr(0, end + 1); -} - -template -int signum(T val) -{ - return (0 < val) - (val < 0); + return DB::RightTrim(v); } using Rune = int32_t; @@ -183,26 +177,26 @@ class Pattern : public ITiDBCollator::IPattern }; template -class BinCollator : public ITiDBCollator +class BinCollator final : public ITiDBCollator { public: explicit BinCollator(int32_t id) : ITiDBCollator(id) {} + int compare(const char * s1, size_t length1, const char * s2, size_t length2) const override { if constexpr (padding) - return signum(rtrim(s1, length1).compare(rtrim(s2, length2))); + return DB::RtrimStrCompare({s1, length1}, {s2, length2}); else - return signum(std::string_view(s1, length1).compare(std::string_view(s2, length2))); + return DB::RawStrCompare({s1, length1}, {s2, length2}); } StringRef sortKey(const char * s, size_t length, std::string &) const override { if constexpr (padding) { - auto v = rtrim(s, length); - return StringRef(v.data(), v.length()); + return StringRef(rtrim(s, length)); } else { @@ -249,7 +243,7 @@ using WeightType = uint16_t; extern const std::array weight_lut; } // namespace GeneralCI -class GeneralCICollator : public ITiDBCollator +class GeneralCICollator final : public ITiDBCollator { public: explicit GeneralCICollator(int32_t id) @@ -270,7 +264,7 @@ class GeneralCICollator : public ITiDBCollator auto sk2 = weight(c2); auto cmp = sk1 - sk2; if (cmp != 0) - return signum(cmp); + return DB::signum(cmp); } return (offset1 < v1.length()) - (offset2 < v2.length()); @@ -365,7 +359,7 @@ const std::array weight_lut_long = { } // namespace UnicodeCI -class UnicodeCICollator : public ITiDBCollator +class UnicodeCICollator final : public ITiDBCollator { public: explicit UnicodeCICollator(int32_t id) @@ -420,7 +414,7 @@ class UnicodeCICollator : public ITiDBCollator } else { - return signum(static_cast(s1_first & 0xFFFF) - static_cast(s2_first & 0xFFFF)); + return DB::signum(static_cast(s1_first & 0xFFFF) - static_cast(s2_first & 0xFFFF)); } } } @@ -593,6 +587,8 @@ class UnicodeCICollator : public ITiDBCollator friend class Pattern; }; +using UTF8MB4_BIN_TYPE = BinCollator; + TiDBCollatorPtr ITiDBCollator::getCollator(int32_t id) { switch (id) @@ -607,10 +603,10 @@ TiDBCollatorPtr ITiDBCollator::getCollator(int32_t id) static const auto latin1_collator = BinCollator(LATIN1_BIN); return &latin1_collator; case ITiDBCollator::UTF8MB4_BIN: - static const auto utf8mb4_collator = BinCollator(UTF8MB4_BIN); + static const auto utf8mb4_collator = UTF8MB4_BIN_TYPE(UTF8MB4_BIN); return &utf8mb4_collator; case ITiDBCollator::UTF8_BIN: - static const auto utf8_collator = BinCollator(UTF8_BIN); + static const auto utf8_collator = UTF8MB4_BIN_TYPE(UTF8_BIN); return &utf8_collator; case ITiDBCollator::UTF8_GENERAL_CI: static const auto utf8_general_ci_collator = GeneralCICollator(UTF8_GENERAL_CI); diff --git a/dbms/src/Storages/Transaction/DecodingStorageSchemaSnapshot.h b/dbms/src/Storages/Transaction/DecodingStorageSchemaSnapshot.h index e8e0610326c..b0cacefe6f4 100644 --- a/dbms/src/Storages/Transaction/DecodingStorageSchemaSnapshot.h +++ b/dbms/src/Storages/Transaction/DecodingStorageSchemaSnapshot.h @@ -77,10 +77,12 @@ struct DecodingStorageSchemaSnapshot , decoding_schema_version{decoding_schema_version_} { std::unordered_map column_lut; + std::unordered_map column_name_id_map; for (size_t i = 0; i < table_info_.columns.size(); i++) { const auto & ci = table_info_.columns[i]; column_lut.emplace(ci.id, i); + column_name_id_map.emplace(ci.name, ci.id); } for (size_t i = 0; i < column_defines->size(); i++) { @@ -88,7 +90,7 @@ struct DecodingStorageSchemaSnapshot sorted_column_id_with_pos.insert({cd.id, i}); if (cd.id != TiDBPkColumnID && cd.id != VersionColumnID && cd.id != DelMarkColumnID) { - auto & columns = table_info_.columns; + const auto & columns = table_info_.columns; column_infos.push_back(columns[column_lut.at(cd.id)]); } else @@ -100,10 +102,14 @@ struct DecodingStorageSchemaSnapshot // create pk related metadata if needed if (is_common_handle) { - const auto & primary_index_info = table_info_.getPrimaryIndexInfo(); - for (size_t i = 0; i < primary_index_info.idx_cols.size(); i++) + /// we will not update the IndexInfo except Rename DDL. + /// When the add column / drop column action happenes, the offset of each column may change + /// Thus, we should not use offset to get the column we want, + /// but use to compare the column name to get the column id. + const auto & primary_index_cols = table_info_.getPrimaryIndexInfo().idx_cols; + for (const auto & col : primary_index_cols) { - auto pk_column_id = table_info_.columns[primary_index_info.idx_cols[i].offset].id; + auto pk_column_id = column_name_id_map[col.name]; pk_column_ids.emplace_back(pk_column_id); pk_pos_map.emplace(pk_column_id, reinterpret_cast(std::numeric_limits::max())); } diff --git a/dbms/src/Storages/Transaction/KVStore.cpp b/dbms/src/Storages/Transaction/KVStore.cpp index 318a04c6ed9..fb31e4476bb 100644 --- a/dbms/src/Storages/Transaction/KVStore.cpp +++ b/dbms/src/Storages/Transaction/KVStore.cpp @@ -129,7 +129,7 @@ void KVStore::traverseRegions(std::function & callback(region.first, region.second); } -void KVStore::tryFlushRegionCacheInStorage(TMTContext & tmt, const Region & region, Poco::Logger * log) +bool KVStore::tryFlushRegionCacheInStorage(TMTContext & tmt, const Region & region, Poco::Logger * log, bool try_until_succeed) { auto table_id = region.getMappedTableID(); auto storage = tmt.getStorages().get(table_id); @@ -139,7 +139,7 @@ void KVStore::tryFlushRegionCacheInStorage(TMTContext & tmt, const Region & regi "tryFlushRegionCacheInStorage can not get table for region {} with table id {}, ignored", region.toString(), table_id); - return; + return true; } try @@ -151,7 +151,7 @@ void KVStore::tryFlushRegionCacheInStorage(TMTContext & tmt, const Region & regi region.getRange()->getMappedTableID(), storage->isCommonHandle(), storage->getRowKeyColumnSize()); - storage->flushCache(tmt.getContext(), rowkey_range); + return storage->flushCache(tmt.getContext(), rowkey_range, try_until_succeed); } catch (DB::Exception & e) { @@ -159,6 +159,7 @@ void KVStore::tryFlushRegionCacheInStorage(TMTContext & tmt, const Region & regi if (e.code() != ErrorCodes::TABLE_IS_DROPPED) throw; } + return true; } void KVStore::tryPersist(RegionID region_id) @@ -326,6 +327,64 @@ void KVStore::persistRegion(const Region & region, const RegionTaskLock & region LOG_FMT_DEBUG(log, "Persist {} done", region.toString(false)); } +bool KVStore::needFlushRegionData(UInt64 region_id, TMTContext & tmt) +{ + auto region_task_lock = region_manager.genRegionTaskLock(region_id); + const RegionPtr curr_region_ptr = getRegion(region_id); + return canFlushRegionDataImpl(curr_region_ptr, false, false, tmt, region_task_lock); +} + +bool KVStore::tryFlushRegionData(UInt64 region_id, bool try_until_succeed, TMTContext & tmt) +{ + auto region_task_lock = region_manager.genRegionTaskLock(region_id); + const RegionPtr curr_region_ptr = getRegion(region_id); + return canFlushRegionDataImpl(curr_region_ptr, true, try_until_succeed, tmt, region_task_lock); +} + +bool KVStore::canFlushRegionDataImpl(const RegionPtr & curr_region_ptr, UInt8 flush_if_possible, bool try_until_succeed, TMTContext & tmt, const RegionTaskLock & region_task_lock) +{ + if (curr_region_ptr == nullptr) + { + throw Exception(fmt::format("region not found when trying flush", ErrorCodes::LOGICAL_ERROR)); + } + auto & curr_region = *curr_region_ptr; + + auto [rows, size_bytes] = curr_region.getApproxMemCacheInfo(); + + LOG_FMT_DEBUG(log, "{} approx mem cache info: rows {}, bytes {}", curr_region.toString(false), rows, size_bytes); + + bool can_flush = false; + if (rows >= region_compact_log_min_rows.load(std::memory_order_relaxed) + || size_bytes >= region_compact_log_min_bytes.load(std::memory_order_relaxed)) + { + // if rows or bytes more than threshold, flush cache and persist mem data. + can_flush = true; + } + else + { + // if there is little data in mem, wait until time interval reached threshold. + // use random period so that lots of regions will not be persisted at same time. + auto compact_log_period = std::rand() % region_compact_log_period.load(std::memory_order_relaxed); // NOLINT + can_flush = !(curr_region.lastCompactLogTime() + Seconds{compact_log_period} > Clock::now()); + } + if (can_flush && flush_if_possible) + { + LOG_FMT_DEBUG(log, "{} flush region due to can_flush_data", curr_region.toString(false)); + if (tryFlushRegionCacheInStorage(tmt, curr_region, log, try_until_succeed)) + { + persistRegion(curr_region, region_task_lock, "compact raft log"); + curr_region.markCompactLog(); + curr_region.cleanApproxMemCacheInfo(); + return true; + } + else + { + return false; + } + } + return can_flush; +} + EngineStoreApplyRes KVStore::handleUselessAdminRaftCmd( raft_cmdpb::AdminCmdType cmd_type, UInt64 curr_region_id, @@ -359,32 +418,12 @@ EngineStoreApplyRes KVStore::handleUselessAdminRaftCmd( } else { - auto [rows, size_bytes] = curr_region.getApproxMemCacheInfo(); - - LOG_FMT_DEBUG(log, "{} approx mem cache info: rows {}, bytes {}", curr_region.toString(false), rows, size_bytes); - - if (rows >= region_compact_log_min_rows.load(std::memory_order_relaxed) - || size_bytes >= region_compact_log_min_bytes.load(std::memory_order_relaxed)) - { - // if rows or bytes more than threshold, flush cache and perist mem data. - return true; - } - else - { - // if thhere is little data in mem, wait until time interval reached threshold. - // use random period so that lots of regions will not be persisted at same time. - auto compact_log_period = std::rand() % region_compact_log_period.load(std::memory_order_relaxed); // NOLINT - return !(curr_region.lastCompactLogTime() + Seconds{compact_log_period} > Clock::now()); - } + return canFlushRegionDataImpl(curr_region_ptr, true, /* try_until_succeed */ false, tmt, region_task_lock); } }; if (check_sync_log()) { - tryFlushRegionCacheInStorage(tmt, curr_region, log); - persistRegion(curr_region, region_task_lock, "compact raft log"); - curr_region.markCompactLog(); - curr_region.cleanApproxMemCacheInfo(); return EngineStoreApplyRes::Persist; } return EngineStoreApplyRes::None; diff --git a/dbms/src/Storages/Transaction/KVStore.h b/dbms/src/Storages/Transaction/KVStore.h index bb45e65d18b..b58083557a1 100644 --- a/dbms/src/Storages/Transaction/KVStore.h +++ b/dbms/src/Storages/Transaction/KVStore.h @@ -91,7 +91,7 @@ class KVStore final : private boost::noncopyable void tryPersist(RegionID region_id); - static void tryFlushRegionCacheInStorage(TMTContext & tmt, const Region & region, Poco::Logger * log); + static bool tryFlushRegionCacheInStorage(TMTContext & tmt, const Region & region, Poco::Logger * log, bool try_until_succeed = true); size_t regionSize() const; EngineStoreApplyRes handleAdminRaftCmd(raft_cmdpb::AdminRequest && request, @@ -108,6 +108,9 @@ class KVStore final : private boost::noncopyable TMTContext & tmt); EngineStoreApplyRes handleWriteRaftCmd(const WriteCmdsView & cmds, UInt64 region_id, UInt64 index, UInt64 term, TMTContext & tmt); + bool needFlushRegionData(UInt64 region_id, TMTContext & tmt); + bool tryFlushRegionData(UInt64 region_id, bool try_until_succeed, TMTContext & tmt); + void handleApplySnapshot(metapb::Region && region, uint64_t peer_id, const SSTViewVec, uint64_t index, uint64_t term, TMTContext & tmt); std::vector /* */ preHandleSnapshotToFiles( @@ -219,6 +222,11 @@ class KVStore final : private boost::noncopyable UInt64 term, TMTContext & tmt); + /// Notice that if flush_if_possible is set to false, we only check if a flush is allowed by rowsize/size/interval. + /// It will not check if a flush will eventually succeed. + /// In other words, `canFlushRegionDataImpl(flush_if_possible=true)` can return false. + bool canFlushRegionDataImpl(const RegionPtr & curr_region_ptr, UInt8 flush_if_possible, bool try_until_succeed, TMTContext & tmt, const RegionTaskLock & region_task_lock); + void persistRegion(const Region & region, const RegionTaskLock & region_task_lock, const char * caller); void releaseReadIndexWorkers(); void handleDestroy(UInt64 region_id, TMTContext & tmt, const KVStoreTaskLock &); diff --git a/dbms/src/Storages/Transaction/PartitionStreams.cpp b/dbms/src/Storages/Transaction/PartitionStreams.cpp index 456f067fe5e..cf151c4270d 100644 --- a/dbms/src/Storages/Transaction/PartitionStreams.cpp +++ b/dbms/src/Storages/Transaction/PartitionStreams.cpp @@ -40,6 +40,8 @@ namespace FailPoints extern const char pause_before_apply_raft_cmd[]; extern const char pause_before_apply_raft_snapshot[]; extern const char force_set_safepoint_when_decode_block[]; +extern const char unblock_query_init_after_write[]; +extern const char pause_query_init[]; } // namespace FailPoints namespace ErrorCodes @@ -151,6 +153,7 @@ static void writeRegionDataToStorage( default: throw Exception("Unknown StorageEngine: " + toString(static_cast(storage->engineType())), ErrorCodes::LOGICAL_ERROR); } + write_part_cost = watch.elapsedMilliseconds(); GET_METRIC(tiflash_raft_write_data_to_storage_duration_seconds, type_write).Observe(write_part_cost / 1000.0); if (need_decode) @@ -165,10 +168,20 @@ static void writeRegionDataToStorage( /// decoding data. Check the test case for more details. FAIL_POINT_PAUSE(FailPoints::pause_before_apply_raft_cmd); + /// disable pause_query_init when the write action finish, to make the query action continue. + /// the usage of unblock_query_init_after_write and pause_query_init can refer to InterpreterSelectQuery::init + SCOPE_EXIT({ + fiu_do_on(FailPoints::unblock_query_init_after_write, { + FailPointHelper::disableFailPoint(FailPoints::pause_query_init); + }); + }); + /// Try read then write once. { if (atomic_read_write(false)) + { return; + } } /// If first try failed, sync schema and force read then write. @@ -177,10 +190,12 @@ static void writeRegionDataToStorage( tmt.getSchemaSyncer()->syncSchemas(context); if (!atomic_read_write(true)) + { // Failure won't be tolerated this time. // TODO: Enrich exception message. throw Exception("Write region " + std::to_string(region->id()) + " to table " + std::to_string(table_id) + " failed", ErrorCodes::LOGICAL_ERROR); + } } } diff --git a/dbms/src/Storages/Transaction/ProxyFFI.cpp b/dbms/src/Storages/Transaction/ProxyFFI.cpp index 8a40ca9b15e..d4ba50d5714 100644 --- a/dbms/src/Storages/Transaction/ProxyFFI.cpp +++ b/dbms/src/Storages/Transaction/ProxyFFI.cpp @@ -128,6 +128,34 @@ EngineStoreApplyRes HandleAdminRaftCmd( } } +uint8_t NeedFlushData(EngineStoreServerWrap * server, uint64_t region_id) +{ + try + { + auto & kvstore = server->tmt->getKVStore(); + return kvstore->needFlushRegionData(region_id, *server->tmt); + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + exit(-1); + } +} + +uint8_t TryFlushData(EngineStoreServerWrap * server, uint64_t region_id, uint8_t until_succeed) +{ + try + { + auto & kvstore = server->tmt->getKVStore(); + return kvstore->tryFlushRegionData(region_id, until_succeed, *server->tmt); + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + exit(-1); + } +} + static_assert(sizeof(RaftStoreProxyFFIHelper) == sizeof(TiFlashRaftProxyHelper)); static_assert(alignof(RaftStoreProxyFFIHelper) == alignof(TiFlashRaftProxyHelper)); diff --git a/dbms/src/Storages/Transaction/ProxyFFI.h b/dbms/src/Storages/Transaction/ProxyFFI.h index e1c01599275..aafe4b375eb 100644 --- a/dbms/src/Storages/Transaction/ProxyFFI.h +++ b/dbms/src/Storages/Transaction/ProxyFFI.h @@ -125,6 +125,8 @@ EngineStoreApplyRes HandleAdminRaftCmd( EngineStoreApplyRes HandleWriteRaftCmd(const EngineStoreServerWrap * server, WriteCmdsView cmds, RaftCmdHeader header); +uint8_t NeedFlushData(EngineStoreServerWrap * server, uint64_t region_id); +uint8_t TryFlushData(EngineStoreServerWrap * server, uint64_t region_id, uint8_t until_succeed); void AtomicUpdateProxy(EngineStoreServerWrap * server, RaftStoreProxyFFIHelper * proxy); void HandleDestroy(EngineStoreServerWrap * server, uint64_t region_id); EngineStoreApplyRes HandleIngestSST(EngineStoreServerWrap * server, SSTViewVec snaps, RaftCmdHeader header); @@ -158,6 +160,8 @@ inline EngineStoreServerHelper GetEngineStoreServerHelper( .fn_gen_cpp_string = GenCppRawString, .fn_handle_write_raft_cmd = HandleWriteRaftCmd, .fn_handle_admin_raft_cmd = HandleAdminRaftCmd, + .fn_need_flush_data = NeedFlushData, + .fn_try_flush_data = TryFlushData, .fn_atomic_update_proxy = AtomicUpdateProxy, .fn_handle_destroy = HandleDestroy, .fn_handle_ingest_sst = HandleIngestSST, diff --git a/dbms/src/Storages/Transaction/ReadIndexWorker.cpp b/dbms/src/Storages/Transaction/ReadIndexWorker.cpp index 3223c815989..7de79dd5c6d 100644 --- a/dbms/src/Storages/Transaction/ReadIndexWorker.cpp +++ b/dbms/src/Storages/Transaction/ReadIndexWorker.cpp @@ -880,7 +880,7 @@ BatchReadIndexRes ReadIndexWorkerManager::batchReadIndex( } } { // if meet timeout, which means part of regions can not get response from leader, try to poll rest tasks - TEST_LOG_FMT("rest {}, poll rest tasks onece", tasks.size()); + TEST_LOG_FMT("rest {}, poll rest tasks once", tasks.size()); while (!tasks.empty()) { diff --git a/dbms/src/Storages/Transaction/RegionBlockReader.cpp b/dbms/src/Storages/Transaction/RegionBlockReader.cpp index a9384e4a14d..2ec690c467b 100644 --- a/dbms/src/Storages/Transaction/RegionBlockReader.cpp +++ b/dbms/src/Storages/Transaction/RegionBlockReader.cpp @@ -208,6 +208,8 @@ bool RegionBlockReader::readImpl(Block & block, const RegionDataReadInfoList & d } index++; } + block.checkNumberOfRows(); + return true; } diff --git a/dbms/src/Storages/Transaction/RegionTable.cpp b/dbms/src/Storages/Transaction/RegionTable.cpp index c855d5b3226..5ae36a4bd64 100644 --- a/dbms/src/Storages/Transaction/RegionTable.cpp +++ b/dbms/src/Storages/Transaction/RegionTable.cpp @@ -230,7 +230,7 @@ void removeObsoleteDataInStorage( auto rowkey_range = DM::RowKeyRange::fromRegionRange(handle_range, table_id, table_id, storage->isCommonHandle(), storage->getRowKeyColumnSize()); dm_storage->deleteRange(rowkey_range, context->getSettingsRef()); - dm_storage->flushCache(*context, rowkey_range); // flush to disk + dm_storage->flushCache(*context, rowkey_range, /*try_until_succeed*/ true); // flush to disk } catch (DB::Exception & e) { diff --git a/dbms/src/Storages/Transaction/TiDB.cpp b/dbms/src/Storages/Transaction/TiDB.cpp index 15bf2a3fb58..6d07c47f235 100644 --- a/dbms/src/Storages/Transaction/TiDB.cpp +++ b/dbms/src/Storages/Transaction/TiDB.cpp @@ -25,6 +25,7 @@ #include #include #include +#include #include @@ -631,8 +632,8 @@ catch (const Poco::Exception & e) /////////////////////// IndexColumnInfo::IndexColumnInfo(Poco::JSON::Object::Ptr json) - : offset(0) - , length(0) + : length(0) + , offset(0) { deserialize(json); } @@ -772,6 +773,37 @@ catch (const Poco::Exception & e) DB::Exception(e)); } +String TiFlashModeToString(TiFlashMode tiflash_mode) +{ + switch (tiflash_mode) + { + case TiFlashMode::Normal: + return ""; + case TiFlashMode::Fast: + return "fast"; + default: + LOG_FMT_WARNING(&Poco::Logger::get("TiDB"), "TiFlashModeToString with invalid tiflash mode {}", tiflash_mode); + return ""; + } +} + +TiFlashMode parseTiFlashMode(String mode_str) +{ + if (mode_str.empty()) + { + return TiFlashMode::Normal; + } + else if (mode_str == "fast") + { + return TiFlashMode::Fast; + } + else + { + throw DB::Exception( + std::string(__PRETTY_FUNCTION__) + + " ParseTiFlashMode Failed. mode " + mode_str + " is unvalid, please set mode as fast/normal"); + } +} /////////////////////// ////// TableInfo ////// /////////////////////// @@ -840,6 +872,8 @@ try json->set("tiflash_replica", replica_info.getJSONObject()); + json->set("tiflash_mode", std::string(TiFlashModeToString(tiflash_mode))); + json->stringify(buf); return buf.str(); @@ -926,6 +960,14 @@ try replica_info.deserialize(replica_obj); } } + if (obj->has("tiflash_mode")) + { + auto mode = obj->getValue("tiflash_mode"); + if (!mode.empty()) + { + tiflash_mode = parseTiFlashMode(mode); + } + } if (is_common_handle && index_infos.size() != 1) { throw DB::Exception( diff --git a/dbms/src/Storages/Transaction/TiDB.h b/dbms/src/Storages/Transaction/TiDB.h index f67bfb332c7..a9d46b60c13 100644 --- a/dbms/src/Storages/Transaction/TiDB.h +++ b/dbms/src/Storages/Transaction/TiDB.h @@ -179,7 +179,6 @@ struct ColumnInfo ColumnID id = -1; String name; - Int32 offset = -1; Poco::Dynamic::Var origin_default_value; Poco::Dynamic::Var default_value; Poco::Dynamic::Var default_bit_value; @@ -212,6 +211,12 @@ struct ColumnInfo static Int64 getTimeValue(const String &); static Int64 getYearValue(const String &); static UInt64 getBitValue(const String &); + +private: + /// please be very careful when you have to use offset, + /// because we never update offset when DDL action changes. + /// Thus, our offset will not exactly correspond the order of columns. + Int32 offset = -1; }; enum PartitionType @@ -298,8 +303,13 @@ struct IndexColumnInfo void deserialize(Poco::JSON::Object::Ptr json); String name; - Int32 offset; Int32 length; + +private: + /// please be very careful when you have to use offset, + /// because we never update offset when DDL action changes. + /// Thus, our offset will not exactly correspond the order of columns. + Int32 offset; }; struct IndexInfo { @@ -323,6 +333,12 @@ struct IndexInfo bool is_global; }; +enum class TiFlashMode +{ + Normal, + Fast, +}; + struct TableInfo { TableInfo() = default; @@ -372,6 +388,8 @@ struct TableInfo // The TiFlash replica info persisted by TiDB TiFlashReplicaInfo replica_info; + TiFlashMode tiflash_mode = TiFlashMode::Normal; + ::TiDB::StorageEngine engine_type = ::TiDB::StorageEngine::UNSPECIFIED; ColumnID getColumnID(const String & name) const; @@ -385,7 +403,12 @@ struct TableInfo bool isLogicalPartitionTable() const { return is_partition_table && belonging_table_id == DB::InvalidTableID && partition.enable; } - /// should not be called if is_common_handle = false + /// should not be called if is_common_handle = false. + /// when use IndexInfo, please avoid to use the offset info + /// the offset value may be wrong in some cases, + /// due to we will not update IndexInfo except RENAME DDL action, + /// but DDL like add column / drop column may change the offset of columns + /// Thus, please be very careful when you must have to use offset information !!!!! const IndexInfo & getPrimaryIndexInfo() const { return index_infos[0]; } IndexInfo & getPrimaryIndexInfo() { return index_infos[0]; } @@ -398,4 +421,7 @@ String genJsonNull(); tipb::FieldType columnInfoToFieldType(const ColumnInfo & ci); ColumnInfo fieldTypeToColumnInfo(const tipb::FieldType & field_type); +String TiFlashModeToString(TiFlashMode tiflash_mode); +TiFlashMode parseTiFlashMode(String mode_str); + } // namespace TiDB diff --git a/dbms/src/Storages/Transaction/TiKVRecordFormat.h b/dbms/src/Storages/Transaction/TiKVRecordFormat.h index c507616f6e9..10a7f7220e9 100644 --- a/dbms/src/Storages/Transaction/TiKVRecordFormat.h +++ b/dbms/src/Storages/Transaction/TiKVRecordFormat.h @@ -154,9 +154,16 @@ inline TiKVKey genKey(const TiDB::TableInfo & table_info, std::vector key memcpy(key.data() + 1, reinterpret_cast(&big_endian_table_id), 8); memcpy(key.data() + 1 + 8, RecordKVFormat::RECORD_PREFIX_SEP, 2); WriteBufferFromOwnString ss; + + std::unordered_map column_name_columns_index_map; + for (size_t i = 0; i < table_info.columns.size(); i++) + { + column_name_columns_index_map.emplace(table_info.columns[i].name, i); + } for (size_t i = 0; i < keys.size(); i++) { - DB::EncodeDatum(keys[i], table_info.columns[table_info.getPrimaryIndexInfo().idx_cols[i].offset].getCodecFlag(), ss); + auto idx = column_name_columns_index_map[table_info.getPrimaryIndexInfo().idx_cols[i].name]; + DB::EncodeDatum(keys[i], table_info.columns[idx].getCodecFlag(), ss); } return encodeAsTiKVKey(key + ss.releaseStr()); } diff --git a/dbms/src/Storages/Transaction/tests/RowCodecTestUtils.h b/dbms/src/Storages/Transaction/tests/RowCodecTestUtils.h index 20b395a9952..34e0d3d4104 100644 --- a/dbms/src/Storages/Transaction/tests/RowCodecTestUtils.h +++ b/dbms/src/Storages/Transaction/tests/RowCodecTestUtils.h @@ -237,14 +237,14 @@ std::pair> getTableInfoAndFields(ColumnIDs handle_ { table_info.is_common_handle = true; TiDB::IndexInfo index_info; - for (size_t i = 0; i < handle_ids.size(); i++) + for (auto handle_id : handle_ids) { TiDB::IndexColumnInfo index_column_info; - for (size_t pos = 0; pos < table_info.columns.size(); pos++) + for (auto & column : table_info.columns) { - if (table_info.columns[pos].id == handle_ids[i]) + if (column.id == handle_id) { - index_column_info.offset = pos; + index_column_info.name = column.name; break; } } diff --git a/dbms/src/Storages/Transaction/tests/bench_region_block_reader.cpp b/dbms/src/Storages/Transaction/tests/bench_region_block_reader.cpp new file mode 100644 index 00000000000..05ab637de7f --- /dev/null +++ b/dbms/src/Storages/Transaction/tests/bench_region_block_reader.cpp @@ -0,0 +1,171 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include + +#include "RowCodecTestUtils.h" + +using TableInfo = TiDB::TableInfo; +namespace DB::tests +{ +using ColumnIDs = std::vector; +class RegionBlockReaderBenchTest : public benchmark::Fixture +{ +protected: + Int64 handle_value = 100; + UInt8 del_mark_value = 0; + UInt64 version_value = 100; + + RegionDataReadInfoList data_list_read; + std::unordered_map fields_map; + + enum RowEncodeVersion + { + RowV1, + RowV2 + }; + +protected: + void SetUp(const benchmark::State & /*state*/) override + { + data_list_read.clear(); + fields_map.clear(); + } + + void encodeColumns(TableInfo & table_info, std::vector & fields, RowEncodeVersion row_version, size_t num_rows) + { + // for later check + std::unordered_map column_name_columns_index_map; + for (size_t i = 0; i < table_info.columns.size(); i++) + { + fields_map.emplace(table_info.columns[i].id, fields[i]); + column_name_columns_index_map.emplace(table_info.columns[i].name, i); + } + + std::vector value_fields; + std::vector pk_fields; + for (size_t i = 0; i < table_info.columns.size(); i++) + { + if (!table_info.columns[i].hasPriKeyFlag()) + value_fields.emplace_back(fields[i]); + else + pk_fields.emplace_back(fields[i]); + } + + // create PK + WriteBufferFromOwnString pk_buf; + if (table_info.is_common_handle) + { + auto & primary_index_info = table_info.getPrimaryIndexInfo(); + for (size_t i = 0; i < primary_index_info.idx_cols.size(); i++) + { + auto idx = column_name_columns_index_map[primary_index_info.idx_cols[i].name]; + EncodeDatum(pk_fields[i], table_info.columns[idx].getCodecFlag(), pk_buf); + } + } + else + { + DB::EncodeInt64(handle_value, pk_buf); + } + RawTiDBPK pk{std::make_shared(pk_buf.releaseStr())}; + // create value + WriteBufferFromOwnString value_buf; + if (row_version == RowEncodeVersion::RowV1) + { + encodeRowV1(table_info, value_fields, value_buf); + } + else if (row_version == RowEncodeVersion::RowV2) + { + encodeRowV2(table_info, value_fields, value_buf); + } + else + { + throw Exception("Unknown row format " + std::to_string(row_version), ErrorCodes::LOGICAL_ERROR); + } + auto row_value = std::make_shared(std::move(value_buf.str())); + for (size_t i = 0; i < num_rows; i++) + data_list_read.emplace_back(pk, del_mark_value, version_value, row_value); + } + + bool decodeColumns(DecodingStorageSchemaSnapshotConstPtr decoding_schema, bool force_decode) const + { + RegionBlockReader reader{decoding_schema}; + Block block = createBlockSortByColumnID(decoding_schema); + return reader.read(block, data_list_read, force_decode); + } + + std::pair> getNormalTableInfoFields(const ColumnIDs & handle_ids, bool is_common_handle) const + { + return getTableInfoAndFields( + handle_ids, + is_common_handle, + ColumnIDValue(2, handle_value), + ColumnIDValue(3, std::numeric_limits::max()), + ColumnIDValue(4, std::numeric_limits::min()), + ColumnIDValue(9, String("aaa")), + ColumnIDValue(10, DecimalField(ToDecimal(12345678910ULL, 4), 4)), + ColumnIDValueNull(11)); + } +}; + +BENCHMARK_DEFINE_F(RegionBlockReaderBenchTest, CommonHandle) +(benchmark::State & state) +{ + size_t num_rows = state.range(0); + auto [table_info, fields] = getNormalTableInfoFields({2, 3, 4}, true); + encodeColumns(table_info, fields, RowEncodeVersion::RowV2, num_rows); + auto decoding_schema = getDecodingStorageSchemaSnapshot(table_info); + for (auto _ : state) + { + decodeColumns(decoding_schema, true); + } +} + + +BENCHMARK_DEFINE_F(RegionBlockReaderBenchTest, PKIsNotHandle) +(benchmark::State & state) +{ + size_t num_rows = state.range(0); + auto [table_info, fields] = getNormalTableInfoFields({EXTRA_HANDLE_COLUMN_ID}, false); + encodeColumns(table_info, fields, RowEncodeVersion::RowV2, num_rows); + auto decoding_schema = getDecodingStorageSchemaSnapshot(table_info); + for (auto _ : state) + { + decodeColumns(decoding_schema, true); + } +} + +BENCHMARK_DEFINE_F(RegionBlockReaderBenchTest, PKIsHandle) +(benchmark::State & state) +{ + size_t num_rows = state.range(0); + auto [table_info, fields] = getNormalTableInfoFields({2}, false); + encodeColumns(table_info, fields, RowEncodeVersion::RowV2, num_rows); + auto decoding_schema = getDecodingStorageSchemaSnapshot(table_info); + for (auto _ : state) + { + decodeColumns(decoding_schema, true); + } +} + +constexpr size_t num_iterations_test = 1000; + +BENCHMARK_REGISTER_F(RegionBlockReaderBenchTest, PKIsHandle)->Iterations(num_iterations_test)->Arg(1)->Arg(10)->Arg(100); +BENCHMARK_REGISTER_F(RegionBlockReaderBenchTest, CommonHandle)->Iterations(num_iterations_test)->Arg(1)->Arg(10)->Arg(100); +BENCHMARK_REGISTER_F(RegionBlockReaderBenchTest, PKIsNotHandle)->Iterations(num_iterations_test)->Arg(1)->Arg(10)->Arg(100); + +} // namespace DB::tests diff --git a/dbms/src/Storages/Transaction/tests/gtest_decoding_storage_schema_snapshot.cpp b/dbms/src/Storages/Transaction/tests/gtest_decoding_storage_schema_snapshot.cpp new file mode 100644 index 00000000000..1de9809ecad --- /dev/null +++ b/dbms/src/Storages/Transaction/tests/gtest_decoding_storage_schema_snapshot.cpp @@ -0,0 +1,65 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include + +#include "RowCodecTestUtils.h" + +namespace DB::tests +{ +static TableInfo getTableInfoByJson(const String & json_table_info) +{ + return TableInfo(json_table_info); +} +TEST(DecodingStorageSchemaSnapshotTest, CheckPKInfosUnderClusteredIndex) +{ + // table with column [A,B,C,D], primary keys [A,C] + const String json_table_info = R"json({"id":75,"name":{"O":"test","L":"test"},"charset":"utf8mb4","collate":"utf8mb4_bin","cols":[{"id":1,"name":{"O":"A","L":"a"},"offset":0,"origin_default":null,"origin_default_bit":null,"default":null,"default_bit":null,"default_is_expr":false,"generated_expr_string":"","generated_stored":false,"dependences":null,"type":{"Tp":3,"Flag":4099,"Flen":11,"Decimal":0,"Charset":"binary","Collate":"binary","Elems":null},"state":5,"comment":"","hidden":false,"change_state_info":null,"version":2},{"id":2,"name":{"O":"B","L":"b"},"offset":1,"origin_default":null,"origin_default_bit":null,"default":null,"default_bit":null,"default_is_expr":false,"generated_expr_string":"","generated_stored":false,"dependences":null,"type":{"Tp":15,"Flag":0,"Flen":20,"Decimal":0,"Charset":"utf8mb4","Collate":"utf8mb4_bin","Elems":null},"state":5,"comment":"","hidden":false,"change_state_info":null,"version":2},{"id":3,"name":{"O":"C","L":"c"},"offset":2,"origin_default":null,"origin_default_bit":null,"default":null,"default_bit":null,"default_is_expr":false,"generated_expr_string":"","generated_stored":false,"dependences":null,"type":{"Tp":3,"Flag":4099,"Flen":11,"Decimal":0,"Charset":"binary","Collate":"binary","Elems":null},"state":5,"comment":"","hidden":false,"change_state_info":null,"version":2},{"id":4,"name":{"O":"D","L":"d"},"offset":3,"origin_default":null,"origin_default_bit":null,"default":null,"default_bit":null,"default_is_expr":false,"generated_expr_string":"","generated_stored":false,"dependences":null,"type":{"Tp":3,"Flag":0,"Flen":11,"Decimal":0,"Charset":"binary","Collate":"binary","Elems":null},"state":5,"comment":"","hidden":false,"change_state_info":null,"version":2}],"index_info":[{"id":1,"idx_name":{"O":"PRIMARY","L":"primary"},"tbl_name":{"O":"","L":""},"idx_cols":[{"name":{"O":"A","L":"a"},"offset":0,"length":-1},{"name":{"O":"C","L":"c"},"offset":2,"length":-1}],"state":5,"comment":"","index_type":1,"is_unique":true,"is_primary":true,"is_invisible":false,"is_global":false}],"constraint_info":null,"fk_info":null,"state":5,"pk_is_handle":false,"is_common_handle":true,"common_handle_version":1,"comment":"","auto_inc_id":0,"auto_id_cache":0,"auto_rand_id":0,"max_col_id":4,"max_idx_id":1,"max_cst_id":0,"update_timestamp":434039123413303302,"ShardRowIDBits":0,"max_shard_row_id_bits":0,"auto_random_bits":0,"pre_split_regions":0,"partition":null,"compression":"","view":null,"sequence":null,"Lock":null,"version":4,"tiflash_replica":{"Count":1,"LocationLabels":[],"Available":false,"AvailablePartitionIDs":null},"is_columnar":false,"temp_table_type":0,"cache_table_status":0,"policy_ref_info":null,"stats_options":null})json"; + auto table_info = getTableInfoByJson(json_table_info); + auto decoding_schema = getDecodingStorageSchemaSnapshot(table_info); + + //check decoding_schema->pk_column_ids infos + ASSERT_EQ(decoding_schema->pk_column_ids.size(), 2); + ASSERT_EQ(decoding_schema->pk_column_ids[0], 1); + ASSERT_EQ(decoding_schema->pk_column_ids[1], 3); + + //check decoding_schema->pk_pos_map infos + ASSERT_EQ(decoding_schema->pk_column_ids.size(), decoding_schema->pk_pos_map.size()); + // there are three hidden column in the decoded block, so the position of A,C is 3,5 + ASSERT_EQ(decoding_schema->pk_pos_map.at(decoding_schema->pk_column_ids[0]), 3); + ASSERT_EQ(decoding_schema->pk_pos_map.at(decoding_schema->pk_column_ids[1]), 5); +} + +TEST(DecodingStorageSchemaSnapshotTest, CheckPKInfosUnderClusteredIndexAfterDropColumn) +{ + // drop column B for [A,B,C,D]; table with column [A,C,D], primary keys [A,C] + const String json_table_info = R"json({"id":75,"name":{"O":"test","L":"test"},"charset":"utf8mb4","collate":"utf8mb4_bin","cols":[{"id":1,"name":{"O":"A","L":"a"},"offset":0,"origin_default":null,"origin_default_bit":null,"default":null,"default_bit":null,"default_is_expr":false,"generated_expr_string":"","generated_stored":false,"dependences":null,"type":{"Tp":3,"Flag":4099,"Flen":11,"Decimal":0,"Charset":"binary","Collate":"binary","Elems":null},"state":5,"comment":"","hidden":false,"change_state_info":null,"version":2},{"id":3,"name":{"O":"C","L":"c"},"offset":2,"origin_default":null,"origin_default_bit":null,"default":null,"default_bit":null,"default_is_expr":false,"generated_expr_string":"","generated_stored":false,"dependences":null,"type":{"Tp":3,"Flag":4099,"Flen":11,"Decimal":0,"Charset":"binary","Collate":"binary","Elems":null},"state":5,"comment":"","hidden":false,"change_state_info":null,"version":2},{"id":4,"name":{"O":"D","L":"d"},"offset":3,"origin_default":null,"origin_default_bit":null,"default":null,"default_bit":null,"default_is_expr":false,"generated_expr_string":"","generated_stored":false,"dependences":null,"type":{"Tp":3,"Flag":0,"Flen":11,"Decimal":0,"Charset":"binary","Collate":"binary","Elems":null},"state":5,"comment":"","hidden":false,"change_state_info":null,"version":2}],"index_info":[{"id":1,"idx_name":{"O":"PRIMARY","L":"primary"},"tbl_name":{"O":"","L":""},"idx_cols":[{"name":{"O":"A","L":"a"},"offset":0,"length":-1},{"name":{"O":"C","L":"c"},"offset":2,"length":-1}],"state":5,"comment":"","index_type":1,"is_unique":true,"is_primary":true,"is_invisible":false,"is_global":false}],"constraint_info":null,"fk_info":null,"state":5,"pk_is_handle":false,"is_common_handle":true,"common_handle_version":1,"comment":"","auto_inc_id":0,"auto_id_cache":0,"auto_rand_id":0,"max_col_id":4,"max_idx_id":1,"max_cst_id":0,"update_timestamp":434039123413303302,"ShardRowIDBits":0,"max_shard_row_id_bits":0,"auto_random_bits":0,"pre_split_regions":0,"partition":null,"compression":"","view":null,"sequence":null,"Lock":null,"version":4,"tiflash_replica":{"Count":1,"LocationLabels":[],"Available":false,"AvailablePartitionIDs":null},"is_columnar":false,"temp_table_type":0,"cache_table_status":0,"policy_ref_info":null,"stats_options":null})json"; + auto table_info = getTableInfoByJson(json_table_info); + auto decoding_schema = getDecodingStorageSchemaSnapshot(table_info); + + //check decoding_schema->pk_column_ids infos + ASSERT_EQ(decoding_schema->pk_column_ids.size(), 2); + ASSERT_EQ(decoding_schema->pk_column_ids[0], 1); + ASSERT_EQ(decoding_schema->pk_column_ids[1], 3); + + //check decoding_schema->pk_pos_map infos + ASSERT_EQ(decoding_schema->pk_column_ids.size(), decoding_schema->pk_pos_map.size()); + // there are three hidden column in the decoded block, so the position of A,C is 3,4 + ASSERT_EQ(decoding_schema->pk_pos_map.at(decoding_schema->pk_column_ids[0]), 3); + ASSERT_EQ(decoding_schema->pk_pos_map.at(decoding_schema->pk_column_ids[1]), 4); +} + +} // namespace DB::tests diff --git a/dbms/src/Storages/Transaction/tests/gtest_kvstore.cpp b/dbms/src/Storages/Transaction/tests/gtest_kvstore.cpp index 36a91522bb6..77aab06f6cf 100644 --- a/dbms/src/Storages/Transaction/tests/gtest_kvstore.cpp +++ b/dbms/src/Storages/Transaction/tests/gtest_kvstore.cpp @@ -1179,6 +1179,12 @@ void RegionKVStoreTest::testKVStore() ASSERT_EQ(e.message(), "unsupported admin command type InvalidAdmin"); } } + { + // There shall be data to flush. + ASSERT_EQ(kvs.needFlushRegionData(19, ctx.getTMTContext()), true); + // Force flush until succeed only for testing. + ASSERT_EQ(kvs.tryFlushRegionData(19, true, ctx.getTMTContext()), true); + } } void test_mergeresult() diff --git a/dbms/src/Storages/Transaction/tests/gtest_region_block_reader.cpp b/dbms/src/Storages/Transaction/tests/gtest_region_block_reader.cpp index 6a883230854..d08b4dd3738 100644 --- a/dbms/src/Storages/Transaction/tests/gtest_region_block_reader.cpp +++ b/dbms/src/Storages/Transaction/tests/gtest_region_block_reader.cpp @@ -26,13 +26,13 @@ using ColumnIDs = std::vector; class RegionBlockReaderTestFixture : public ::testing::Test { protected: - Int64 handle_value_ = 100; - UInt8 del_mark_value_ = 0; - UInt64 version_value_ = 100; - size_t rows_ = 3; + Int64 handle_value = 100; + UInt8 del_mark_value = 0; + UInt64 version_value = 100; + size_t rows = 3; - RegionDataReadInfoList data_list_read_; - std::unordered_map fields_map_; + RegionDataReadInfoList data_list_read; + std::unordered_map fields_map; enum RowEncodeVersion { @@ -43,8 +43,8 @@ class RegionBlockReaderTestFixture : public ::testing::Test protected: void SetUp() override { - data_list_read_.clear(); - fields_map_.clear(); + data_list_read.clear(); + fields_map.clear(); } void TearDown() override {} @@ -52,8 +52,12 @@ class RegionBlockReaderTestFixture : public ::testing::Test void encodeColumns(TableInfo & table_info, std::vector & fields, RowEncodeVersion row_version) { // for later check + std::unordered_map column_name_columns_index_map; for (size_t i = 0; i < table_info.columns.size(); i++) - fields_map_.emplace(table_info.columns[i].id, fields[i]); + { + fields_map.emplace(table_info.columns[i].id, fields[i]); + column_name_columns_index_map.emplace(table_info.columns[i].name, i); + } std::vector value_fields; std::vector pk_fields; @@ -72,13 +76,13 @@ class RegionBlockReaderTestFixture : public ::testing::Test auto & primary_index_info = table_info.getPrimaryIndexInfo(); for (size_t i = 0; i < primary_index_info.idx_cols.size(); i++) { - size_t pk_offset = primary_index_info.idx_cols[i].offset; - EncodeDatum(pk_fields[i], table_info.columns[pk_offset].getCodecFlag(), pk_buf); + auto idx = column_name_columns_index_map[primary_index_info.idx_cols[i].name]; + EncodeDatum(pk_fields[i], table_info.columns[idx].getCodecFlag(), pk_buf); } } else { - DB::EncodeInt64(handle_value_, pk_buf); + DB::EncodeInt64(handle_value, pk_buf); } RawTiDBPK pk{std::make_shared(pk_buf.releaseStr())}; // create value @@ -96,44 +100,44 @@ class RegionBlockReaderTestFixture : public ::testing::Test throw Exception("Unknown row format " + std::to_string(row_version), ErrorCodes::LOGICAL_ERROR); } auto row_value = std::make_shared(std::move(value_buf.str())); - for (size_t i = 0; i < rows_; i++) - data_list_read_.emplace_back(pk, del_mark_value_, version_value_, row_value); + for (size_t i = 0; i < rows; i++) + data_list_read.emplace_back(pk, del_mark_value, version_value, row_value); } void checkBlock(DecodingStorageSchemaSnapshotConstPtr decoding_schema, const Block & block) const { ASSERT_EQ(block.columns(), decoding_schema->column_defines->size()); - for (size_t row = 0; row < rows_; row++) + for (size_t row = 0; row < rows; row++) { for (size_t pos = 0; pos < block.columns(); pos++) { - auto & column_element = block.getByPosition(pos); + const auto & column_element = block.getByPosition(pos); if (row == 0) { - ASSERT_EQ(column_element.column->size(), rows_); + ASSERT_EQ(column_element.column->size(), rows); } if (column_element.name == EXTRA_HANDLE_COLUMN_NAME) { if (decoding_schema->is_common_handle) { - ASSERT_EQ((*column_element.column)[row], Field(*std::get<0>(data_list_read_[row]))); + ASSERT_EQ((*column_element.column)[row], Field(*std::get<0>(data_list_read[row]))); } else { - ASSERT_EQ((*column_element.column)[row], Field(handle_value_)); + ASSERT_EQ((*column_element.column)[row], Field(handle_value)); } } else if (column_element.name == VERSION_COLUMN_NAME) { - ASSERT_EQ((*column_element.column)[row], Field(version_value_)); + ASSERT_EQ((*column_element.column)[row], Field(version_value)); } else if (column_element.name == TAG_COLUMN_NAME) { - ASSERT_EQ((*column_element.column)[row], Field(NearestFieldType::Type(del_mark_value_))); + ASSERT_EQ((*column_element.column)[row], Field(NearestFieldType::Type(del_mark_value))); } else { - ASSERT_EQ((*column_element.column)[row], fields_map_.at(column_element.column_id)); + ASSERT_EQ((*column_element.column)[row], fields_map.at(column_element.column_id)); } } } @@ -143,7 +147,7 @@ class RegionBlockReaderTestFixture : public ::testing::Test { RegionBlockReader reader{decoding_schema}; Block block = createBlockSortByColumnID(decoding_schema); - if (!reader.read(block, data_list_read_, force_decode)) + if (!reader.read(block, data_list_read, force_decode)) return false; checkBlock(decoding_schema, block); @@ -155,7 +159,7 @@ class RegionBlockReaderTestFixture : public ::testing::Test return getTableInfoAndFields( handle_ids, is_common_handle, - ColumnIDValue(2, handle_value_), + ColumnIDValue(2, handle_value), ColumnIDValue(3, std::numeric_limits::max()), ColumnIDValue(4, std::numeric_limits::min()), ColumnIDValue(9, String("aaa")), @@ -170,7 +174,7 @@ class RegionBlockReaderTestFixture : public ::testing::Test handle_ids, is_common_handle, ColumnIDValue(1, String("")), - ColumnIDValue(2, handle_value_), + ColumnIDValue(2, handle_value), ColumnIDValue(3, std::numeric_limits::max()), ColumnIDValue(4, std::numeric_limits::min()), ColumnIDValue(8, String("")), @@ -182,12 +186,12 @@ class RegionBlockReaderTestFixture : public ::testing::Test // add default value for missing column std::vector missing_column_ids{1, 8, 13}; String missing_column_default_value = String("default"); - for (size_t i = 0; i < table_info.columns.size(); i++) + for (auto & column : table_info.columns) { - if (std::find(missing_column_ids.begin(), missing_column_ids.end(), table_info.columns[i].id) != missing_column_ids.end()) + if (std::find(missing_column_ids.begin(), missing_column_ids.end(), column.id) != missing_column_ids.end()) { - table_info.columns[i].origin_default_value = missing_column_default_value; - fields_map_.emplace(table_info.columns[i].id, Field(missing_column_default_value)); + column.origin_default_value = missing_column_default_value; + fields_map.emplace(column.id, Field(missing_column_default_value)); } } return table_info; @@ -199,7 +203,7 @@ class RegionBlockReaderTestFixture : public ::testing::Test std::tie(table_info, std::ignore) = getTableInfoAndFields( handle_ids, is_common_handle, - ColumnIDValue(2, handle_value_), + ColumnIDValue(2, handle_value), ColumnIDValue(4, std::numeric_limits::min()), ColumnIDValue(9, String("aaa")), ColumnIDValue(10, DecimalField(ToDecimal(12345678910ULL, 4), 4))); @@ -212,7 +216,7 @@ class RegionBlockReaderTestFixture : public ::testing::Test std::tie(table_info, std::ignore) = getTableInfoAndFields( handle_ids, is_common_handle, - ColumnIDValue(2, handle_value_), + ColumnIDValue(2, handle_value), ColumnIDValue(3, std::numeric_limits::max()), ColumnIDValue(4, std::numeric_limits::min()), ColumnIDValue(9, String("aaa")), @@ -227,7 +231,7 @@ class RegionBlockReaderTestFixture : public ::testing::Test std::tie(table_info, std::ignore) = getTableInfoAndFields( handle_ids, is_common_handle, - ColumnIDValue(2, handle_value_), + ColumnIDValue(2, handle_value), ColumnIDValue(3, std::numeric_limits::max()), ColumnIDValue(4, std::numeric_limits::min()), ColumnIDValue(9, String("aaa")), diff --git a/dbms/src/Storages/Transaction/tests/gtest_table_info.cpp b/dbms/src/Storages/Transaction/tests/gtest_table_info.cpp index 516a173b151..871153cb0e9 100644 --- a/dbms/src/Storages/Transaction/tests/gtest_table_info.cpp +++ b/dbms/src/Storages/Transaction/tests/gtest_table_info.cpp @@ -42,7 +42,7 @@ struct ParseCase std::function check; }; -TEST(TiDBTableInfo_test, ParseFromJSON) +TEST(TiDBTableInfoTest, ParseFromJSON) try { auto cases = { @@ -136,54 +136,54 @@ struct StmtCase } }; -TEST(TiDBTableInfo_test, GenCreateTableStatement) +TEST(TiDBTableInfoTest, GenCreateTableStatement) try { auto cases = // {StmtCase{ 1145, // R"json({"id":1939,"db_name":{"O":"customer","L":"customer"},"charset":"utf8mb4","collate":"utf8mb4_bin","state":5})json", // - R"json({"id":1145,"name":{"O":"customerdebt","L":"customerdebt"},"cols":[{"id":1,"name":{"O":"id","L":"id"},"offset":0,"origin_default":null,"default":null,"default_bit":null,"type":{"Tp":8,"Flag":515,"Flen":20,"Decimal":0},"state":5,"comment":"i\"d"}],"state":5,"pk_is_handle":true,"schema_version":-1,"comment":"负债信息","partition":null})json", // - R"stmt(CREATE TABLE `customer`.`customerdebt`(`id` Int64) Engine = DeltaMerge((`id`), '{"cols":[{"comment":"i\\"d","default":null,"default_bit":null,"id":1,"name":{"L":"id","O":"id"},"offset":0,"origin_default":null,"state":5,"type":{"Charset":null,"Collate":null,"Decimal":0,"Elems":null,"Flag":515,"Flen":20,"Tp":8}}],"comment":"\\u8D1F\\u503A\\u4FE1\\u606F","id":1145,"index_info":[],"is_common_handle":false,"name":{"L":"customerdebt","O":"customerdebt"},"partition":null,"pk_is_handle":true,"schema_version":-1,"state":5,"tiflash_replica":{"Count":0},"update_timestamp":0}'))stmt", // + R"json({"id":1145,"name":{"O":"customerdebt","L":"customerdebt"},"cols":[{"id":1,"name":{"O":"id","L":"id"},"offset":0,"origin_default":null,"default":null,"default_bit":null,"type":{"Tp":8,"Flag":515,"Flen":20,"Decimal":0},"state":5,"comment":"i\"d"}],"state":5,"pk_is_handle":true,"schema_version":-1,"comment":"负债信息","partition":null,"tiflash_mode":"fast"})json", // + R"stmt(CREATE TABLE `customer`.`customerdebt`(`id` Int64) Engine = DeltaMerge((`id`), '{"cols":[{"comment":"i\\"d","default":null,"default_bit":null,"id":1,"name":{"L":"id","O":"id"},"offset":0,"origin_default":null,"state":5,"type":{"Charset":null,"Collate":null,"Decimal":0,"Elems":null,"Flag":515,"Flen":20,"Tp":8}}],"comment":"\\u8D1F\\u503A\\u4FE1\\u606F","id":1145,"index_info":[],"is_common_handle":false,"name":{"L":"customerdebt","O":"customerdebt"},"partition":null,"pk_is_handle":true,"schema_version":-1,"state":5,"tiflash_mode":"fast","tiflash_replica":{"Count":0},"update_timestamp":0}'))stmt", // }, StmtCase{ 2049, // R"json({"id":1939,"db_name":{"O":"customer","L":"customer"},"charset":"utf8mb4","collate":"utf8mb4_bin","state":5})json", // - R"json({"id":2049,"name":{"O":"customerdebt","L":"customerdebt"},"cols":[{"id":1,"name":{"O":"id","L":"id"},"offset":0,"origin_default":null,"default":null,"default_bit":null,"type":{"Tp":8,"Flag":515,"Flen":20,"Decimal":0},"state":5,"comment":"i\"d"}],"state":5,"pk_is_handle":true,"schema_version":-1,"comment":"负债信息","update_timestamp":404545295996944390,"partition":null})json", // - R"stmt(CREATE TABLE `customer`.`customerdebt`(`id` Int64) Engine = DeltaMerge((`id`), '{"cols":[{"comment":"i\\"d","default":null,"default_bit":null,"id":1,"name":{"L":"id","O":"id"},"offset":0,"origin_default":null,"state":5,"type":{"Charset":null,"Collate":null,"Decimal":0,"Elems":null,"Flag":515,"Flen":20,"Tp":8}}],"comment":"\\u8D1F\\u503A\\u4FE1\\u606F","id":2049,"index_info":[],"is_common_handle":false,"name":{"L":"customerdebt","O":"customerdebt"},"partition":null,"pk_is_handle":true,"schema_version":-1,"state":5,"tiflash_replica":{"Count":0},"update_timestamp":404545295996944390}'))stmt", // + R"json({"id":2049,"name":{"O":"customerdebt","L":"customerdebt"},"cols":[{"id":1,"name":{"O":"id","L":"id"},"offset":0,"origin_default":null,"default":null,"default_bit":null,"type":{"Tp":8,"Flag":515,"Flen":20,"Decimal":0},"state":5,"comment":"i\"d"}],"state":5,"pk_is_handle":true,"schema_version":-1,"comment":"负债信息","update_timestamp":404545295996944390,"partition":null,"tiflash_mode":""})json", // + R"stmt(CREATE TABLE `customer`.`customerdebt`(`id` Int64) Engine = DeltaMerge((`id`), '{"cols":[{"comment":"i\\"d","default":null,"default_bit":null,"id":1,"name":{"L":"id","O":"id"},"offset":0,"origin_default":null,"state":5,"type":{"Charset":null,"Collate":null,"Decimal":0,"Elems":null,"Flag":515,"Flen":20,"Tp":8}}],"comment":"\\u8D1F\\u503A\\u4FE1\\u606F","id":2049,"index_info":[],"is_common_handle":false,"name":{"L":"customerdebt","O":"customerdebt"},"partition":null,"pk_is_handle":true,"schema_version":-1,"state":5,"tiflash_mode":"","tiflash_replica":{"Count":0},"update_timestamp":404545295996944390}'))stmt", // }, StmtCase{ 31, // R"json({"id":1,"db_name":{"O":"db1","L":"db1"},"charset":"utf8mb4","collate":"utf8mb4_bin","state":5})json", // - R"json({"id":31,"name":{"O":"simple_t","L":"simple_t"},"charset":"","collate":"","cols":[{"id":1,"name":{"O":"i","L":"i"},"offset":0,"origin_default":null,"default":null,"default_bit":null,"generated_expr_string":"","generated_stored":false,"dependences":null,"type":{"Tp":3,"Flag":0,"Flen":11,"Decimal":0,"Charset":"binary","Collate":"binary","Elems":null},"state":5,"comment":""}],"index_info":null,"fk_info":null,"state":5,"pk_is_handle":false,"schema_version":-1,"comment":"","auto_inc_id":0,"max_col_id":1,"max_idx_id":0,"update_timestamp":404545295996944390,"ShardRowIDBits":0,"partition":null})json", // - R"stmt(CREATE TABLE `db1`.`simple_t`(`i` Nullable(Int32), `_tidb_rowid` Int64) Engine = DeltaMerge((`_tidb_rowid`), '{"cols":[{"comment":"","default":null,"default_bit":null,"id":1,"name":{"L":"i","O":"i"},"offset":0,"origin_default":null,"state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":0,"Elems":null,"Flag":0,"Flen":11,"Tp":3}}],"comment":"","id":31,"index_info":[],"is_common_handle":false,"name":{"L":"simple_t","O":"simple_t"},"partition":null,"pk_is_handle":false,"schema_version":-1,"state":5,"tiflash_replica":{"Count":0},"update_timestamp":404545295996944390}'))stmt", // + R"json({"id":31,"name":{"O":"simple_t","L":"simple_t"},"charset":"","collate":"","cols":[{"id":1,"name":{"O":"i","L":"i"},"offset":0,"origin_default":null,"default":null,"default_bit":null,"generated_expr_string":"","generated_stored":false,"dependences":null,"type":{"Tp":3,"Flag":0,"Flen":11,"Decimal":0,"Charset":"binary","Collate":"binary","Elems":null},"state":5,"comment":""}],"index_info":null,"fk_info":null,"state":5,"pk_is_handle":false,"schema_version":-1,"comment":"","auto_inc_id":0,"max_col_id":1,"max_idx_id":0,"update_timestamp":404545295996944390,"ShardRowIDBits":0,"partition":null,"tiflash_mode":""})json", // + R"stmt(CREATE TABLE `db1`.`simple_t`(`i` Nullable(Int32), `_tidb_rowid` Int64) Engine = DeltaMerge((`_tidb_rowid`), '{"cols":[{"comment":"","default":null,"default_bit":null,"id":1,"name":{"L":"i","O":"i"},"offset":0,"origin_default":null,"state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":0,"Elems":null,"Flag":0,"Flen":11,"Tp":3}}],"comment":"","id":31,"index_info":[],"is_common_handle":false,"name":{"L":"simple_t","O":"simple_t"},"partition":null,"pk_is_handle":false,"schema_version":-1,"state":5,"tiflash_mode":"","tiflash_replica":{"Count":0},"update_timestamp":404545295996944390}'))stmt", // }, StmtCase{ 33, // R"json({"id":2,"db_name":{"O":"db2","L":"db2"},"charset":"utf8mb4","collate":"utf8mb4_bin","state":5})json", // - R"json({"id":33,"name":{"O":"pk_t","L":"pk_t"},"charset":"","collate":"","cols":[{"id":1,"name":{"O":"i","L":"i"},"offset":0,"origin_default":null,"default":null,"default_bit":null,"generated_expr_string":"","generated_stored":false,"dependences":null,"type":{"Tp":3,"Flag":3,"Flen":11,"Decimal":0,"Charset":"binary","Collate":"binary","Elems":null},"state":5,"comment":""}],"index_info":null,"fk_info":null,"state":5,"pk_is_handle":true,"schema_version":-1,"comment":"","auto_inc_id":0,"max_col_id":1,"max_idx_id":0,"update_timestamp":404545312978108418,"ShardRowIDBits":0,"partition":null})json", // - R"stmt(CREATE TABLE `db2`.`pk_t`(`i` Int32) Engine = DeltaMerge((`i`), '{"cols":[{"comment":"","default":null,"default_bit":null,"id":1,"name":{"L":"i","O":"i"},"offset":0,"origin_default":null,"state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":0,"Elems":null,"Flag":3,"Flen":11,"Tp":3}}],"comment":"","id":33,"index_info":[],"is_common_handle":false,"name":{"L":"pk_t","O":"pk_t"},"partition":null,"pk_is_handle":true,"schema_version":-1,"state":5,"tiflash_replica":{"Count":0},"update_timestamp":404545312978108418}'))stmt", // + R"json({"id":33,"name":{"O":"pk_t","L":"pk_t"},"charset":"","collate":"","cols":[{"id":1,"name":{"O":"i","L":"i"},"offset":0,"origin_default":null,"default":null,"default_bit":null,"generated_expr_string":"","generated_stored":false,"dependences":null,"type":{"Tp":3,"Flag":3,"Flen":11,"Decimal":0,"Charset":"binary","Collate":"binary","Elems":null},"state":5,"comment":""}],"index_info":null,"fk_info":null,"state":5,"pk_is_handle":true,"schema_version":-1,"comment":"","auto_inc_id":0,"max_col_id":1,"max_idx_id":0,"update_timestamp":404545312978108418,"ShardRowIDBits":0,"partition":null,"tiflash_mode":""})json", // + R"stmt(CREATE TABLE `db2`.`pk_t`(`i` Int32) Engine = DeltaMerge((`i`), '{"cols":[{"comment":"","default":null,"default_bit":null,"id":1,"name":{"L":"i","O":"i"},"offset":0,"origin_default":null,"state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":0,"Elems":null,"Flag":3,"Flen":11,"Tp":3}}],"comment":"","id":33,"index_info":[],"is_common_handle":false,"name":{"L":"pk_t","O":"pk_t"},"partition":null,"pk_is_handle":true,"schema_version":-1,"state":5,"tiflash_mode":"","tiflash_replica":{"Count":0},"update_timestamp":404545312978108418}'))stmt", // }, StmtCase{ 35, // R"json({"id":1,"db_name":{"O":"db1","L":"db1"},"charset":"utf8mb4","collate":"utf8mb4_bin","state":5})json", // - R"json({"id":35,"name":{"O":"not_null_t","L":"not_null_t"},"charset":"","collate":"","cols":[{"id":1,"name":{"O":"i","L":"i"},"offset":0,"origin_default":null,"default":null,"default_bit":null,"generated_expr_string":"","generated_stored":false,"dependences":null,"type":{"Tp":3,"Flag":4097,"Flen":11,"Decimal":0,"Charset":"binary","Collate":"binary","Elems":null},"state":5,"comment":""}],"index_info":null,"fk_info":null,"state":5,"pk_is_handle":false,"schema_version":-1,"comment":"","auto_inc_id":0,"max_col_id":1,"max_idx_id":0,"update_timestamp":404545324922961926,"ShardRowIDBits":0,"partition":null})json", // - R"stmt(CREATE TABLE `db1`.`not_null_t`(`i` Int32, `_tidb_rowid` Int64) Engine = DeltaMerge((`_tidb_rowid`), '{"cols":[{"comment":"","default":null,"default_bit":null,"id":1,"name":{"L":"i","O":"i"},"offset":0,"origin_default":null,"state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":0,"Elems":null,"Flag":4097,"Flen":11,"Tp":3}}],"comment":"","id":35,"index_info":[],"is_common_handle":false,"name":{"L":"not_null_t","O":"not_null_t"},"partition":null,"pk_is_handle":false,"schema_version":-1,"state":5,"tiflash_replica":{"Count":0},"update_timestamp":404545324922961926}'))stmt", // + R"json({"id":35,"name":{"O":"not_null_t","L":"not_null_t"},"charset":"","collate":"","cols":[{"id":1,"name":{"O":"i","L":"i"},"offset":0,"origin_default":null,"default":null,"default_bit":null,"generated_expr_string":"","generated_stored":false,"dependences":null,"type":{"Tp":3,"Flag":4097,"Flen":11,"Decimal":0,"Charset":"binary","Collate":"binary","Elems":null},"state":5,"comment":""}],"index_info":null,"fk_info":null,"state":5,"pk_is_handle":false,"schema_version":-1,"comment":"","auto_inc_id":0,"max_col_id":1,"max_idx_id":0,"update_timestamp":404545324922961926,"ShardRowIDBits":0,"partition":null,"tiflash_mode":""})json", // + R"stmt(CREATE TABLE `db1`.`not_null_t`(`i` Int32, `_tidb_rowid` Int64) Engine = DeltaMerge((`_tidb_rowid`), '{"cols":[{"comment":"","default":null,"default_bit":null,"id":1,"name":{"L":"i","O":"i"},"offset":0,"origin_default":null,"state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":0,"Elems":null,"Flag":4097,"Flen":11,"Tp":3}}],"comment":"","id":35,"index_info":[],"is_common_handle":false,"name":{"L":"not_null_t","O":"not_null_t"},"partition":null,"pk_is_handle":false,"schema_version":-1,"state":5,"tiflash_mode":"","tiflash_replica":{"Count":0},"update_timestamp":404545324922961926}'))stmt", // }, StmtCase{ 37, // R"json({"id":2,"db_name":{"O":"db2","L":"db2"},"charset":"utf8mb4","collate":"utf8mb4_bin","state":5})json", - R"json({"id":37,"name":{"O":"mytable","L":"mytable"},"charset":"","collate":"","cols":[{"id":1,"name":{"O":"mycol","L":"mycol"},"offset":0,"origin_default":null,"default":null,"default_bit":null,"generated_expr_string":"","generated_stored":false,"dependences":null,"type":{"Tp":15,"Flag":4099,"Flen":256,"Decimal":0,"Charset":"utf8","Collate":"utf8_bin","Elems":null},"state":5,"comment":""}],"index_info":[{"id":1,"idx_name":{"O":"PRIMARY","L":"primary"},"tbl_name":{"O":"","L":""},"idx_cols":[{"name":{"O":"mycol","L":"mycol"},"offset":0,"length":-1}],"is_unique":true,"is_primary":true,"state":5,"comment":"","index_type":1}],"fk_info":null,"state":5,"pk_is_handle":true,"schema_version":-1,"comment":"","auto_inc_id":0,"max_col_id":1,"max_idx_id":1,"update_timestamp":404566455285710853,"ShardRowIDBits":0,"partition":null})json", // - R"stmt(CREATE TABLE `db2`.`mytable`(`mycol` String) Engine = DeltaMerge((`mycol`), '{"cols":[{"comment":"","default":null,"default_bit":null,"id":1,"name":{"L":"mycol","O":"mycol"},"offset":0,"origin_default":null,"state":5,"type":{"Charset":"utf8","Collate":"utf8_bin","Decimal":0,"Elems":null,"Flag":4099,"Flen":256,"Tp":15}}],"comment":"","id":37,"index_info":[],"is_common_handle":false,"name":{"L":"mytable","O":"mytable"},"partition":null,"pk_is_handle":true,"schema_version":-1,"state":5,"tiflash_replica":{"Count":0},"update_timestamp":404566455285710853}'))stmt", // + R"json({"id":37,"name":{"O":"mytable","L":"mytable"},"charset":"","collate":"","cols":[{"id":1,"name":{"O":"mycol","L":"mycol"},"offset":0,"origin_default":null,"default":null,"default_bit":null,"generated_expr_string":"","generated_stored":false,"dependences":null,"type":{"Tp":15,"Flag":4099,"Flen":256,"Decimal":0,"Charset":"utf8","Collate":"utf8_bin","Elems":null},"state":5,"comment":""}],"index_info":[{"id":1,"idx_name":{"O":"PRIMARY","L":"primary"},"tbl_name":{"O":"","L":""},"idx_cols":[{"name":{"O":"mycol","L":"mycol"},"offset":0,"length":-1}],"is_unique":true,"is_primary":true,"state":5,"comment":"","index_type":1}],"fk_info":null,"state":5,"pk_is_handle":true,"schema_version":-1,"comment":"","auto_inc_id":0,"max_col_id":1,"max_idx_id":1,"update_timestamp":404566455285710853,"ShardRowIDBits":0,"partition":null,"tiflash_mode":""})json", // + R"stmt(CREATE TABLE `db2`.`mytable`(`mycol` String) Engine = DeltaMerge((`mycol`), '{"cols":[{"comment":"","default":null,"default_bit":null,"id":1,"name":{"L":"mycol","O":"mycol"},"offset":0,"origin_default":null,"state":5,"type":{"Charset":"utf8","Collate":"utf8_bin","Decimal":0,"Elems":null,"Flag":4099,"Flen":256,"Tp":15}}],"comment":"","id":37,"index_info":[],"is_common_handle":false,"name":{"L":"mytable","O":"mytable"},"partition":null,"pk_is_handle":true,"schema_version":-1,"state":5,"tiflash_mode":"","tiflash_replica":{"Count":0},"update_timestamp":404566455285710853}'))stmt", // }, StmtCase{ 32, // R"json({"id":1,"db_name":{"O":"test","L":"test"},"charset":"utf8mb4","collate":"utf8mb4_bin","state":5})json", // - R"json({"id":31,"name":{"O":"range_part_t","L":"range_part_t"},"charset":"utf8mb4","collate":"utf8mb4_bin","cols":[{"id":1,"name":{"O":"i","L":"i"},"offset":0,"origin_default":null,"default":null,"default_bit":null,"generated_expr_string":"","generated_stored":false,"dependences":null,"type":{"Tp":3,"Flag":0,"Flen":11,"Decimal":0,"Charset":"binary","Collate":"binary","Elems":null},"state":5,"comment":"","version":0}],"index_info":null,"fk_info":null,"state":5,"pk_is_handle":false,"schema_version":-1,"comment":"","auto_inc_id":0,"max_col_id":1,"max_idx_id":0,"update_timestamp":407445773801488390,"ShardRowIDBits":0,"partition":{"type":1,"expr":"`i`","columns":null,"enable":true,"definitions":[{"id":32,"name":{"O":"p0","L":"p0"},"less_than":["0"]},{"id":33,"name":{"O":"p1","L":"p1"},"less_than":["100"]}],"num":0},"compression":"","version":1})json", // - R"stmt(CREATE TABLE `test`.`range_part_t_32`(`i` Nullable(Int32), `_tidb_rowid` Int64) Engine = DeltaMerge((`_tidb_rowid`), '{"belonging_table_id":31,"cols":[{"comment":"","default":null,"default_bit":null,"id":1,"name":{"L":"i","O":"i"},"offset":0,"origin_default":null,"state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":0,"Elems":null,"Flag":0,"Flen":11,"Tp":3}}],"comment":"","id":32,"index_info":[],"is_common_handle":false,"is_partition_sub_table":true,"name":{"L":"range_part_t_32","O":"range_part_t_32"},"partition":null,"pk_is_handle":false,"schema_version":-1,"state":5,"tiflash_replica":{"Count":0},"update_timestamp":407445773801488390}'))stmt", // + R"json({"id":31,"name":{"O":"range_part_t","L":"range_part_t"},"charset":"utf8mb4","collate":"utf8mb4_bin","cols":[{"id":1,"name":{"O":"i","L":"i"},"offset":0,"origin_default":null,"default":null,"default_bit":null,"generated_expr_string":"","generated_stored":false,"dependences":null,"type":{"Tp":3,"Flag":0,"Flen":11,"Decimal":0,"Charset":"binary","Collate":"binary","Elems":null},"state":5,"comment":"","version":0}],"index_info":null,"fk_info":null,"state":5,"pk_is_handle":false,"schema_version":-1,"comment":"","auto_inc_id":0,"max_col_id":1,"max_idx_id":0,"update_timestamp":407445773801488390,"ShardRowIDBits":0,"partition":{"type":1,"expr":"`i`","columns":null,"enable":true,"definitions":[{"id":32,"name":{"O":"p0","L":"p0"},"less_than":["0"]},{"id":33,"name":{"O":"p1","L":"p1"},"less_than":["100"]}],"num":0},"compression":"","version":1,"tiflash_mode":""})json", // + R"stmt(CREATE TABLE `test`.`range_part_t_32`(`i` Nullable(Int32), `_tidb_rowid` Int64) Engine = DeltaMerge((`_tidb_rowid`), '{"belonging_table_id":31,"cols":[{"comment":"","default":null,"default_bit":null,"id":1,"name":{"L":"i","O":"i"},"offset":0,"origin_default":null,"state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":0,"Elems":null,"Flag":0,"Flen":11,"Tp":3}}],"comment":"","id":32,"index_info":[],"is_common_handle":false,"is_partition_sub_table":true,"name":{"L":"range_part_t_32","O":"range_part_t_32"},"partition":null,"pk_is_handle":false,"schema_version":-1,"state":5,"tiflash_mode":"","tiflash_replica":{"Count":0},"update_timestamp":407445773801488390}'))stmt", // }}; - for (auto & c : cases) + for (const auto & c : cases) { c.verifyTableInfo(); } diff --git a/dbms/src/TestUtils/ExecutorTestUtils.cpp b/dbms/src/TestUtils/ExecutorTestUtils.cpp index 881ebaf88db..634e483abd2 100644 --- a/dbms/src/TestUtils/ExecutorTestUtils.cpp +++ b/dbms/src/TestUtils/ExecutorTestUtils.cpp @@ -104,41 +104,39 @@ Block mergeBlocks(Blocks blocks) return Block(actual_columns); } -void readBlock(BlockInputStreamPtr stream, const ColumnsWithTypeAndName & expect_columns) +DB::ColumnsWithTypeAndName readBlock(BlockInputStreamPtr stream) { Blocks actual_blocks; - Block except_block(expect_columns); stream->readPrefix(); while (auto block = stream->read()) { actual_blocks.push_back(block); } stream->readSuffix(); - Block actual_block = mergeBlocks(actual_blocks); - ASSERT_BLOCK_EQ(except_block, actual_block); + return mergeBlocks(actual_blocks).getColumnsWithTypeAndName(); } } // namespace -void ExecutorTest::executeStreams(const std::shared_ptr & request, std::unordered_map & source_columns_map, const ColumnsWithTypeAndName & expect_columns, size_t concurrency) +DB::ColumnsWithTypeAndName ExecutorTest::executeStreams(const std::shared_ptr & request, std::unordered_map & source_columns_map, size_t concurrency) { DAGContext dag_context(*request, "executor_test", concurrency); dag_context.setColumnsForTest(source_columns_map); context.context.setDAGContext(&dag_context); // Currently, don't care about regions information in tests. DAGQuerySource dag(context.context); - readBlock(executeQuery(dag, context.context, false, QueryProcessingStage::Complete).in, expect_columns); + return readBlock(executeQuery(dag, context.context, false, QueryProcessingStage::Complete).in); } -void ExecutorTest::executeStreams(const std::shared_ptr & request, const ColumnsWithTypeAndName & expect_columns, size_t concurrency) +DB::ColumnsWithTypeAndName ExecutorTest::executeStreams(const std::shared_ptr & request, size_t concurrency) { - executeStreams(request, context.executorIdColumnsMap(), expect_columns, concurrency); + return executeStreams(request, context.executorIdColumnsMap(), concurrency); } -void ExecutorTest::executeStreamsWithSingleSource(const std::shared_ptr & request, const ColumnsWithTypeAndName & source_columns, const ColumnsWithTypeAndName & expect_columns, SourceType type, size_t concurrency) +DB::ColumnsWithTypeAndName ExecutorTest::executeStreamsWithSingleSource(const std::shared_ptr & request, const ColumnsWithTypeAndName & source_columns, SourceType type, size_t concurrency) { std::unordered_map source_columns_map; source_columns_map[getSourceName(type)] = source_columns; - executeStreams(request, source_columns_map, expect_columns, concurrency); + return executeStreams(request, source_columns_map, concurrency); } void ExecutorTest::dagRequestEqual(const String & expected_string, const std::shared_ptr & actual) diff --git a/dbms/src/TestUtils/ExecutorTestUtils.h b/dbms/src/TestUtils/ExecutorTestUtils.h index 87bb7115bed..59b829e04b5 100644 --- a/dbms/src/TestUtils/ExecutorTestUtils.h +++ b/dbms/src/TestUtils/ExecutorTestUtils.h @@ -25,6 +25,9 @@ namespace DB::tests { void executeInterpreter(const std::shared_ptr & request, Context & context); + +::testing::AssertionResult check_columns_equality(const ColumnsWithTypeAndName & expected, const ColumnsWithTypeAndName & actual, bool _restrict); + class ExecutorTest : public ::testing::Test { protected: @@ -72,20 +75,17 @@ class ExecutorTest : public ::testing::Test } } - void executeStreams( + ColumnsWithTypeAndName executeStreams( const std::shared_ptr & request, std::unordered_map & source_columns_map, - const ColumnsWithTypeAndName & expect_columns, size_t concurrency = 1); - void executeStreams( + ColumnsWithTypeAndName executeStreams( const std::shared_ptr & request, - const ColumnsWithTypeAndName & expect_columns, size_t concurrency = 1); - void executeStreamsWithSingleSource( + ColumnsWithTypeAndName executeStreamsWithSingleSource( const std::shared_ptr & request, const ColumnsWithTypeAndName & source_columns, - const ColumnsWithTypeAndName & expect_columns, SourceType type = TableScan, size_t concurrency = 1); @@ -96,4 +96,4 @@ class ExecutorTest : public ::testing::Test #define ASSERT_DAGREQUEST_EQAUL(str, request) dagRequestEqual((str), (request)); #define ASSERT_BLOCKINPUTSTREAM_EQAUL(str, request, concurrency) executeInterpreter((str), (request), (concurrency)) -} // namespace DB::tests \ No newline at end of file +} // namespace DB::tests diff --git a/dbms/src/TestUtils/FunctionTestUtils.cpp b/dbms/src/TestUtils/FunctionTestUtils.cpp index 637fbf51c00..1c8b0242bfa 100644 --- a/dbms/src/TestUtils/FunctionTestUtils.cpp +++ b/dbms/src/TestUtils/FunctionTestUtils.cpp @@ -13,7 +13,9 @@ // limitations under the License. #include +#include #include +#include #include #include #include @@ -23,7 +25,10 @@ #include #include #include -#include + +#include +#include + namespace DB { @@ -103,21 +108,118 @@ ::testing::AssertionResult columnEqual( return columnEqual(expected.column, actual.column); } -void blockEqual( +::testing::AssertionResult blockEqual( const Block & expected, const Block & actual) { size_t columns = actual.columns(); + size_t expected_columns = expected.columns(); - ASSERT_TRUE(expected.columns() == columns); + ASSERT_EQUAL(expected_columns, columns, "Block size mismatch"); for (size_t i = 0; i < columns; ++i) { const auto & expected_col = expected.getByPosition(i); const auto & actual_col = actual.getByPosition(i); - ASSERT_TRUE(actual_col.type->getName() == expected_col.type->getName()); - ASSERT_COLUMN_EQ(expected_col.column, actual_col.column); + + auto cmp_res = columnEqual(expected_col, actual_col); + if (!cmp_res) + return cmp_res; + } + return ::testing::AssertionSuccess(); +} + +/// size of each column should be the same +std::multiset columnsToRowSet(const ColumnsWithTypeAndName & cols) +{ + if (cols.empty()) + return {}; + if (cols[0].column->empty()) + return {}; + + size_t cols_size = cols.size(); + std::vector rows{cols[0].column->size()}; + + for (auto & r : rows) + { + r.resize(cols_size, true); + } + + for (auto const & [col_id, col] : ext::enumerate(cols)) + { + for (size_t i = 0, size = col.column->size(); i < size; ++i) + { + new (rows[i].place(col_id)) Field((*col.column)[i]); + } + } + return {std::make_move_iterator(rows.begin()), std::make_move_iterator(rows.end())}; +} + +::testing::AssertionResult columnsEqual( + const ColumnsWithTypeAndName & expected, + const ColumnsWithTypeAndName & actual, + bool _restrict) +{ + if (_restrict) + return blockEqual(Block(expected), Block(actual)); + + auto expect_cols_size = expected.size(); + auto actual_cols_size = actual.size(); + + ASSERT_EQUAL(expect_cols_size, actual_cols_size, "Columns size mismatch"); + + for (size_t i = 0; i < expect_cols_size; ++i) + { + auto const & expect_col = expected[i]; + auto const & actual_col = actual[i]; + ASSERT_EQUAL(expect_col.column->getName(), actual_col.column->getName(), fmt::format("Column {} name mismatch", i)); + ASSERT_EQUAL(expect_col.column->size(), actual_col.column->size(), fmt::format("Column {} size mismatch", i)); + auto type_eq = dataTypeEqual(expected[i].type, actual[i].type); + if (!type_eq) + return type_eq; + } + + auto const expected_row_set = columnsToRowSet(expected); + auto const actual_row_set = columnsToRowSet(actual); + + if (expected_row_set != actual_row_set) + { + FmtBuffer buf; + + auto expect_it = expected_row_set.begin(); + auto actual_it = actual_row_set.begin(); + + buf.append("Columns row set mismatch\n").append("expected_row_set:\n"); + for (; expect_it != expected_row_set.end(); ++expect_it, ++actual_it) + { + buf.joinStr( + expect_it->begin(), + expect_it->end(), + [](const auto & v, FmtBuffer & fb) { fb.append(v.toString()); }, + " ") + .append("\n"); + if (*expect_it != *actual_it) + break; + } + + ++actual_it; + + buf.append("...\nactual_row_set:\n"); + for (auto it = actual_row_set.begin(); it != actual_it; ++it) + { + buf.joinStr( + it->begin(), + it->end(), + [](const auto & v, FmtBuffer & fb) { fb.append(v.toString()); }, + " ") + .append("\n"); + } + buf.append("...\n"); + + return testing::AssertionFailure() << buf.toString(); } + + return testing::AssertionSuccess(); } std::pair buildFunction( @@ -274,5 +376,64 @@ ColumnWithTypeAndName toNullableDatetimeVec(String name, const std::vector(fsp)); return {makeColumn>(data_type, vec), data_type, name, 0}; } + +String getColumnsContent(const ColumnsWithTypeAndName & cols) +{ + if (cols.size() <= 0) + return ""; + return getColumnsContent(cols, 0, cols[0].column->size() - 1); +} + +String getColumnsContent(const ColumnsWithTypeAndName & cols, size_t begin, size_t end) +{ + const size_t col_num = cols.size(); + if (col_num <= 0) + return ""; + + const size_t col_size = cols[0].column->size(); + assert(begin <= end); + assert(col_size > end); + assert(col_size > begin); + + bool is_same = true; + + for (size_t i = 1; i < col_num; ++i) + { + if (cols[i].column->size() != col_size) + is_same = false; + } + + assert(is_same); /// Ensure the sizes of columns in cols are the same + + std::vector> col_content; + FmtBuffer fmt_buf; + for (size_t i = 0; i < col_num; ++i) + { + /// Push the column name + fmt_buf.append(fmt::format("{}: (", cols[i].name)); + for (size_t j = begin; j <= end; ++j) + col_content.push_back(std::make_pair(j, (*cols[i].column)[j].toString())); + + /// Add content + fmt_buf.joinStr( + col_content.begin(), + col_content.end(), + [](const auto & content, FmtBuffer & fmt_buf) { + fmt_buf.append(fmt::format("{}: {}", content.first, content.second)); + }, + ", "); + + fmt_buf.append(")\n"); + col_content.clear(); + } + + return fmt_buf.toString(); +} + +ColumnsWithTypeAndName createColumns(const ColumnsWithTypeAndName & cols) +{ + return cols; +} + } // namespace tests } // namespace DB diff --git a/dbms/src/TestUtils/FunctionTestUtils.h b/dbms/src/TestUtils/FunctionTestUtils.h index d6b7351df05..8680d1886b1 100644 --- a/dbms/src/TestUtils/FunctionTestUtils.h +++ b/dbms/src/TestUtils/FunctionTestUtils.h @@ -514,6 +514,17 @@ ColumnWithTypeAndName createConstColumn( return createConstColumn(data_type_args, size, InferredFieldType(std::nullopt), name); } +String getColumnsContent(const ColumnsWithTypeAndName & cols); + +/// We can designate the range of columns printed with begin and end. range: [begin, end] +String getColumnsContent(const ColumnsWithTypeAndName & cols, size_t begin, size_t end); + +// This wrapper function only serves to construct columns input for function-like macros, +// since preprocessor recognizes `{col1, col2, col3}` as three arguments instead of one. +// E.g. preprocessor does not allow us to write `ASSERT_COLUMNS_EQ_R({col1, col2, col3}, actual_cols)`, +// but with this func we can write `ASSERT_COLUMNS_EQ_R(createColumns{col1, col2, col3}, actual_cols)` instead. +ColumnsWithTypeAndName createColumns(const ColumnsWithTypeAndName & cols); + ::testing::AssertionResult dataTypeEqual( const DataTypePtr & expected, const DataTypePtr & actual); @@ -527,10 +538,15 @@ ::testing::AssertionResult columnEqual( const ColumnWithTypeAndName & expected, const ColumnWithTypeAndName & actual); -void blockEqual( +::testing::AssertionResult blockEqual( const Block & expected, const Block & actual); +::testing::AssertionResult columnsEqual( + const ColumnsWithTypeAndName & expected, + const ColumnsWithTypeAndName & actual, + bool _restrict); + ColumnWithTypeAndName executeFunction( Context & context, const String & func_name, @@ -756,5 +772,10 @@ class FunctionTest : public ::testing::Test #define ASSERT_COLUMN_EQ(expected, actual) ASSERT_TRUE(DB::tests::columnEqual((expected), (actual))) #define ASSERT_BLOCK_EQ(expected, actual) DB::tests::blockEqual((expected), (actual)) + +/// restrictly checking columns equality, both data set and each row's offset should be the same +#define ASSERT_COLUMNS_EQ_R(expected, actual) ASSERT_TRUE(DB::tests::columnsEqual((expected), (actual), true)) +/// unrestrictly checking columns equality, only checking data set equality +#define ASSERT_COLUMNS_EQ_UR(expected, actual) ASSERT_TRUE(DB::tests::columnsEqual((expected), (actual), false)) } // namespace tests } // namespace DB diff --git a/dbms/src/TestUtils/TiFlashTestEnv.cpp b/dbms/src/TestUtils/TiFlashTestEnv.cpp index cbd42b57550..f44298cbafd 100644 --- a/dbms/src/TestUtils/TiFlashTestEnv.cpp +++ b/dbms/src/TestUtils/TiFlashTestEnv.cpp @@ -39,6 +39,11 @@ void TiFlashTestEnv::initializeGlobalContext(Strings testdata_path, PageStorageR KeyManagerPtr key_manager = std::make_shared(false); global_context->initializeFileProvider(key_manager, false); + // initialize background & blockable background thread pool + Settings & settings = global_context->getSettingsRef(); + global_context->initializeBackgroundPool(settings.background_pool_size); + global_context->initializeBlockableBackgroundPool(settings.background_pool_size); + // Theses global variables should be initialized by the following order // 1. capacity // 2. path pool diff --git a/dbms/src/TestUtils/bench_dbms_main.cpp b/dbms/src/TestUtils/bench_dbms_main.cpp index 48bd02a71f7..092c45c35e2 100644 --- a/dbms/src/TestUtils/bench_dbms_main.cpp +++ b/dbms/src/TestUtils/bench_dbms_main.cpp @@ -20,6 +20,8 @@ int main(int argc, char * argv[]) { benchmark::Initialize(&argc, argv); DB::tests::TiFlashTestEnv::setupLogger(); + // Each time TiFlashTestEnv::getContext() is called, some log will print, it's annoying. + Poco::Logger::root().setLevel("error"); DB::tests::TiFlashTestEnv::initializeGlobalContext(); if (::benchmark::ReportUnrecognizedArguments(argc, argv)) return 1; diff --git a/dbms/src/TestUtils/mockExecutor.cpp b/dbms/src/TestUtils/mockExecutor.cpp index 2cf8a939b58..30d05786c9a 100644 --- a/dbms/src/TestUtils/mockExecutor.cpp +++ b/dbms/src/TestUtils/mockExecutor.cpp @@ -35,7 +35,7 @@ ASTPtr buildLiteral(const Field & field) return std::make_shared(field); } -ASTPtr buildOrderByItemList(MockOrderByItems order_by_items) +ASTPtr buildOrderByItemVec(MockOrderByItemVec order_by_items) { std::vector vec(order_by_items.size()); size_t i = 0; @@ -92,7 +92,7 @@ std::shared_ptr DAGRequestBuilder::build(MockDAGRequestContext return dag_request_ptr; } -DAGRequestBuilder & DAGRequestBuilder::mockTable(const String & db, const String & table, const MockColumnInfos & columns) +DAGRequestBuilder & DAGRequestBuilder::mockTable(const String & db, const String & table, const MockColumnInfoVec & columns) { assert(!columns.empty()); TableInfo table_info; @@ -114,27 +114,17 @@ DAGRequestBuilder & DAGRequestBuilder::mockTable(const String & db, const String return *this; } -DAGRequestBuilder & DAGRequestBuilder::mockTable(const MockTableName & name, const MockColumnInfos & columns) +DAGRequestBuilder & DAGRequestBuilder::mockTable(const MockTableName & name, const MockColumnInfoVec & columns) { return mockTable(name.first, name.second, columns); } -DAGRequestBuilder & DAGRequestBuilder::mockTable(const MockTableName & name, const MockColumnInfoList & columns) +DAGRequestBuilder & DAGRequestBuilder::exchangeReceiver(const MockColumnInfoVec & columns, uint64_t fine_grained_shuffle_stream_count) { - return mockTable(name.first, name.second, columns); -} - -DAGRequestBuilder & DAGRequestBuilder::exchangeReceiver(const MockColumnInfos & columns) -{ - return buildExchangeReceiver(columns); + return buildExchangeReceiver(columns, fine_grained_shuffle_stream_count); } -DAGRequestBuilder & DAGRequestBuilder::exchangeReceiver(const MockColumnInfoList & columns) -{ - return buildExchangeReceiver(columns); -} - -DAGRequestBuilder & DAGRequestBuilder::buildExchangeReceiver(const MockColumnInfos & columns) +DAGRequestBuilder & DAGRequestBuilder::buildExchangeReceiver(const MockColumnInfoVec & columns, uint64_t fine_grained_shuffle_stream_count) { DAGSchema schema; for (const auto & column : columns) @@ -145,7 +135,7 @@ DAGRequestBuilder & DAGRequestBuilder::buildExchangeReceiver(const MockColumnInf schema.push_back({column.first, info}); } - root = compileExchangeReceiver(getExecutorIndex(), schema); + root = compileExchangeReceiver(getExecutorIndex(), schema, fine_grained_shuffle_stream_count); return *this; } @@ -180,33 +170,23 @@ DAGRequestBuilder & DAGRequestBuilder::topN(ASTPtr order_exprs, ASTPtr limit_exp DAGRequestBuilder & DAGRequestBuilder::topN(const String & col_name, bool desc, int limit) { assert(root); - root = compileTopN(root, getExecutorIndex(), buildOrderByItemList({{col_name, desc}}), buildLiteral(Field(static_cast(limit)))); + root = compileTopN(root, getExecutorIndex(), buildOrderByItemVec({{col_name, desc}}), buildLiteral(Field(static_cast(limit)))); return *this; } -DAGRequestBuilder & DAGRequestBuilder::topN(MockOrderByItems order_by_items, int limit) +DAGRequestBuilder & DAGRequestBuilder::topN(MockOrderByItemVec order_by_items, int limit) { return topN(order_by_items, buildLiteral(Field(static_cast(limit)))); } -DAGRequestBuilder & DAGRequestBuilder::topN(MockOrderByItems order_by_items, ASTPtr limit_expr) -{ - assert(root); - root = compileTopN(root, getExecutorIndex(), buildOrderByItemList(order_by_items), limit_expr); - return *this; -} - -DAGRequestBuilder & DAGRequestBuilder::project(const String & col_name) +DAGRequestBuilder & DAGRequestBuilder::topN(MockOrderByItemVec order_by_items, ASTPtr limit_expr) { assert(root); - auto exp_list = std::make_shared(); - exp_list->children.push_back(buildColumn(col_name)); - - root = compileProject(root, getExecutorIndex(), exp_list); + root = compileTopN(root, getExecutorIndex(), buildOrderByItemVec(order_by_items), limit_expr); return *this; } -DAGRequestBuilder & DAGRequestBuilder::project(MockAsts exprs) +DAGRequestBuilder & DAGRequestBuilder::project(MockAstVec exprs) { assert(root); auto exp_list = std::make_shared(); @@ -218,7 +198,7 @@ DAGRequestBuilder & DAGRequestBuilder::project(MockAsts exprs) return *this; } -DAGRequestBuilder & DAGRequestBuilder::project(MockColumnNames col_names) +DAGRequestBuilder & DAGRequestBuilder::project(MockColumnNameVec col_names) { assert(root); auto exp_list = std::make_shared(); @@ -237,12 +217,12 @@ DAGRequestBuilder & DAGRequestBuilder::exchangeSender(tipb::ExchangeType exchang return *this; } -DAGRequestBuilder & DAGRequestBuilder::join(const DAGRequestBuilder & right, MockAsts exprs) +DAGRequestBuilder & DAGRequestBuilder::join(const DAGRequestBuilder & right, MockAstVec exprs) { return join(right, exprs, ASTTableJoin::Kind::Inner); } -DAGRequestBuilder & DAGRequestBuilder::join(const DAGRequestBuilder & right, MockAsts exprs, ASTTableJoin::Kind kind) +DAGRequestBuilder & DAGRequestBuilder::join(const DAGRequestBuilder & right, MockAstVec exprs, ASTTableJoin::Kind kind) { assert(root); assert(right.root); @@ -268,7 +248,7 @@ DAGRequestBuilder & DAGRequestBuilder::aggregation(ASTPtr agg_func, ASTPtr group return buildAggregation(agg_funcs, group_by_exprs); } -DAGRequestBuilder & DAGRequestBuilder::aggregation(MockAsts agg_funcs, MockAsts group_by_exprs) +DAGRequestBuilder & DAGRequestBuilder::aggregation(MockAstVec agg_funcs, MockAstVec group_by_exprs) { auto agg_func_list = std::make_shared(); auto group_by_expr_list = std::make_shared(); @@ -286,85 +266,63 @@ DAGRequestBuilder & DAGRequestBuilder::buildAggregation(ASTPtr agg_funcs, ASTPtr return *this; } -DAGRequestBuilder & DAGRequestBuilder::window(ASTPtr window_func, MockOrderByItem order_by, MockPartitionByItem partition_by, MockWindowFrame frame) +DAGRequestBuilder & DAGRequestBuilder::window(ASTPtr window_func, MockOrderByItem order_by, MockPartitionByItem partition_by, MockWindowFrame frame, uint64_t fine_grained_shuffle_stream_count) { assert(root); auto window_func_list = std::make_shared(); window_func_list->children.push_back(window_func); - root = compileWindow(root, getExecutorIndex(), window_func_list, buildOrderByItemList({partition_by}), buildOrderByItemList({order_by}), frame); + root = compileWindow(root, getExecutorIndex(), window_func_list, buildOrderByItemVec({partition_by}), buildOrderByItemVec({order_by}), frame, fine_grained_shuffle_stream_count); return *this; } -DAGRequestBuilder & DAGRequestBuilder::window(ASTPtr window_func, MockOrderByItems order_by_list, MockPartitionByItems partition_by_list, MockWindowFrame frame) +DAGRequestBuilder & DAGRequestBuilder::window(ASTPtr window_func, MockOrderByItemVec order_by_vec, MockPartitionByItemVec partition_by_vec, MockWindowFrame frame, uint64_t fine_grained_shuffle_stream_count) { assert(root); auto window_func_list = std::make_shared(); window_func_list->children.push_back(window_func); - root = compileWindow(root, getExecutorIndex(), window_func_list, buildOrderByItemList(partition_by_list), buildOrderByItemList(order_by_list), frame); + root = compileWindow(root, getExecutorIndex(), window_func_list, buildOrderByItemVec(partition_by_vec), buildOrderByItemVec(order_by_vec), frame, fine_grained_shuffle_stream_count); return *this; } -DAGRequestBuilder & DAGRequestBuilder::window(MockAsts window_funcs, MockOrderByItems order_by_list, MockPartitionByItems partition_by_list, MockWindowFrame frame) +DAGRequestBuilder & DAGRequestBuilder::window(MockAstVec window_funcs, MockOrderByItemVec order_by_vec, MockPartitionByItemVec partition_by_vec, MockWindowFrame frame, uint64_t fine_grained_shuffle_stream_count) { assert(root); auto window_func_list = std::make_shared(); for (const auto & func : window_funcs) window_func_list->children.push_back(func); - root = compileWindow(root, getExecutorIndex(), window_func_list, buildOrderByItemList(partition_by_list), buildOrderByItemList(order_by_list), frame); + root = compileWindow(root, getExecutorIndex(), window_func_list, buildOrderByItemVec(partition_by_vec), buildOrderByItemVec(order_by_vec), frame, fine_grained_shuffle_stream_count); return *this; } -DAGRequestBuilder & DAGRequestBuilder::sort(MockOrderByItem order_by, bool is_partial_sort) +DAGRequestBuilder & DAGRequestBuilder::sort(MockOrderByItem order_by, bool is_partial_sort, uint64_t fine_grained_shuffle_stream_count) { assert(root); - root = compileSort(root, getExecutorIndex(), buildOrderByItemList({order_by}), is_partial_sort); + root = compileSort(root, getExecutorIndex(), buildOrderByItemVec({order_by}), is_partial_sort, fine_grained_shuffle_stream_count); return *this; } -DAGRequestBuilder & DAGRequestBuilder::sort(MockOrderByItems order_by_list, bool is_partial_sort) +DAGRequestBuilder & DAGRequestBuilder::sort(MockOrderByItemVec order_by_vec, bool is_partial_sort, uint64_t fine_grained_shuffle_stream_count) { assert(root); - root = compileSort(root, getExecutorIndex(), buildOrderByItemList(order_by_list), is_partial_sort); + root = compileSort(root, getExecutorIndex(), buildOrderByItemVec(order_by_vec), is_partial_sort, fine_grained_shuffle_stream_count); return *this; } -void MockDAGRequestContext::addMockTable(const MockTableName & name, const MockColumnInfoList & columnInfos) -{ - std::vector v_column_info(columnInfos.size()); - size_t i = 0; - for (const auto & info : columnInfos) - { - v_column_info[i++] = std::move(info); - } - mock_tables[name.first + "." + name.second] = v_column_info; -} - -void MockDAGRequestContext::addMockTable(const String & db, const String & table, const MockColumnInfos & columnInfos) +void MockDAGRequestContext::addMockTable(const String & db, const String & table, const MockColumnInfoVec & columnInfos) { mock_tables[db + "." + table] = columnInfos; } -void MockDAGRequestContext::addMockTable(const MockTableName & name, const MockColumnInfos & columnInfos) +void MockDAGRequestContext::addMockTable(const MockTableName & name, const MockColumnInfoVec & columnInfos) { mock_tables[name.first + "." + name.second] = columnInfos; } -void MockDAGRequestContext::addExchangeRelationSchema(String name, const MockColumnInfos & columnInfos) +void MockDAGRequestContext::addExchangeRelationSchema(String name, const MockColumnInfoVec & columnInfos) { exchange_schemas[name] = columnInfos; } -void MockDAGRequestContext::addExchangeRelationSchema(String name, const MockColumnInfoList & columnInfos) -{ - std::vector v_column_info(columnInfos.size()); - size_t i = 0; - for (const auto & info : columnInfos) - { - v_column_info[i++] = std::move(info); - } - exchange_schemas[name] = v_column_info; -} - void MockDAGRequestContext::addMockTableColumnData(const String & db, const String & table, ColumnsWithTypeAndName columns) { mock_table_columns[db + "." + table] = columns; @@ -380,37 +338,19 @@ void MockDAGRequestContext::addExchangeReceiverColumnData(const String & name, C mock_exchange_columns[name] = columns; } -void MockDAGRequestContext::addMockTable(const String & db, const String & table, const MockColumnInfoList & columnInfos, ColumnsWithTypeAndName columns) +void MockDAGRequestContext::addMockTable(const String & db, const String & table, const MockColumnInfoVec & columnInfos, ColumnsWithTypeAndName columns) { addMockTable(db, table, columnInfos); addMockTableColumnData(db, table, columns); } -void MockDAGRequestContext::addMockTable(const String & db, const String & table, const MockColumnInfos & columnInfos, ColumnsWithTypeAndName columns) -{ - addMockTable(db, table, columnInfos); - addMockTableColumnData(db, table, columns); -} - -void MockDAGRequestContext::addMockTable(const MockTableName & name, const MockColumnInfoList & columnInfos, ColumnsWithTypeAndName columns) +void MockDAGRequestContext::addMockTable(const MockTableName & name, const MockColumnInfoVec & columnInfos, ColumnsWithTypeAndName columns) { addMockTable(name, columnInfos); addMockTableColumnData(name, columns); } -void MockDAGRequestContext::addMockTable(const MockTableName & name, const MockColumnInfos & columnInfos, ColumnsWithTypeAndName columns) -{ - addMockTable(name, columnInfos); - addMockTableColumnData(name, columns); -} - -void MockDAGRequestContext::addExchangeReceiver(const String & name, MockColumnInfos columnInfos, ColumnsWithTypeAndName columns) -{ - addExchangeRelationSchema(name, columnInfos); - addExchangeReceiverColumnData(name, columns); -} - -void MockDAGRequestContext::addExchangeReceiver(const String & name, MockColumnInfoList columnInfos, ColumnsWithTypeAndName columns) +void MockDAGRequestContext::addExchangeReceiver(const String & name, MockColumnInfoVec columnInfos, ColumnsWithTypeAndName columns) { addExchangeRelationSchema(name, columnInfos); addExchangeReceiverColumnData(name, columns); @@ -428,9 +368,9 @@ DAGRequestBuilder MockDAGRequestContext::scan(String db_name, String table_name) return builder; } -DAGRequestBuilder MockDAGRequestContext::receive(String exchange_name) +DAGRequestBuilder MockDAGRequestContext::receive(String exchange_name, uint64_t fine_grained_shuffle_stream_count) { - auto builder = DAGRequestBuilder(index).exchangeReceiver(exchange_schemas[exchange_name]); + auto builder = DAGRequestBuilder(index).exchangeReceiver(exchange_schemas[exchange_name], fine_grained_shuffle_stream_count); receiver_source_task_ids_map[builder.getRoot()->name] = {}; // If don't have related columns, user must pass input columns as argument of executeStreams in order to run Executors Tests. // If user don't want to test executors, it will be safe to run Interpreter Tests. @@ -440,5 +380,4 @@ DAGRequestBuilder MockDAGRequestContext::receive(String exchange_name) } return builder; } - } // namespace DB::tests diff --git a/dbms/src/TestUtils/mockExecutor.h b/dbms/src/TestUtils/mockExecutor.h index c11635ac93e..8b5a6d300ff 100644 --- a/dbms/src/TestUtils/mockExecutor.h +++ b/dbms/src/TestUtils/mockExecutor.h @@ -23,15 +23,14 @@ namespace DB::tests { using MockColumnInfo = std::pair; -using MockColumnInfos = std::vector; -using MockColumnInfoList = std::initializer_list; +using MockColumnInfoVec = std::vector; using MockTableName = std::pair; using MockOrderByItem = std::pair; -using MockOrderByItems = std::initializer_list; +using MockOrderByItemVec = std::vector; using MockPartitionByItem = std::pair; -using MockPartitionByItems = std::initializer_list; -using MockColumnNames = std::initializer_list; -using MockAsts = std::initializer_list; +using MockPartitionByItemVec = std::vector; +using MockColumnNameVec = std::vector; +using MockAstVec = std::vector; using MockWindowFrame = mock::MockWindowFrame; class MockDAGRequestContext; @@ -64,12 +63,10 @@ class DAGRequestBuilder std::shared_ptr build(MockDAGRequestContext & mock_context); - DAGRequestBuilder & mockTable(const String & db, const String & table, const MockColumnInfos & columns); - DAGRequestBuilder & mockTable(const MockTableName & name, const MockColumnInfos & columns); - DAGRequestBuilder & mockTable(const MockTableName & name, const MockColumnInfoList & columns); + DAGRequestBuilder & mockTable(const String & db, const String & table, const MockColumnInfoVec & columns); + DAGRequestBuilder & mockTable(const MockTableName & name, const MockColumnInfoVec & columns); - DAGRequestBuilder & exchangeReceiver(const MockColumnInfos & columns); - DAGRequestBuilder & exchangeReceiver(const MockColumnInfoList & columns); + DAGRequestBuilder & exchangeReceiver(const MockColumnInfoVec & columns, uint64_t fine_grained_shuffle_stream_count = 0); DAGRequestBuilder & filter(ASTPtr filter_expr); @@ -78,35 +75,34 @@ class DAGRequestBuilder DAGRequestBuilder & topN(ASTPtr order_exprs, ASTPtr limit_expr); DAGRequestBuilder & topN(const String & col_name, bool desc, int limit); - DAGRequestBuilder & topN(MockOrderByItems order_by_items, int limit); - DAGRequestBuilder & topN(MockOrderByItems order_by_items, ASTPtr limit_expr); + DAGRequestBuilder & topN(MockOrderByItemVec order_by_items, int limit); + DAGRequestBuilder & topN(MockOrderByItemVec order_by_items, ASTPtr limit_expr); - DAGRequestBuilder & project(const String & col_name); - DAGRequestBuilder & project(MockAsts expr); - DAGRequestBuilder & project(MockColumnNames col_names); + DAGRequestBuilder & project(MockAstVec exprs); + DAGRequestBuilder & project(MockColumnNameVec col_names); DAGRequestBuilder & exchangeSender(tipb::ExchangeType exchange_type); - // Currentlt only support inner join, left join and right join. + // Currently only support inner join, left join and right join. // TODO support more types of join. - DAGRequestBuilder & join(const DAGRequestBuilder & right, MockAsts exprs); - DAGRequestBuilder & join(const DAGRequestBuilder & right, MockAsts exprs, ASTTableJoin::Kind kind); + DAGRequestBuilder & join(const DAGRequestBuilder & right, MockAstVec exprs); + DAGRequestBuilder & join(const DAGRequestBuilder & right, MockAstVec exprs, ASTTableJoin::Kind kind); // aggregation DAGRequestBuilder & aggregation(ASTPtr agg_func, ASTPtr group_by_expr); - DAGRequestBuilder & aggregation(MockAsts agg_funcs, MockAsts group_by_exprs); + DAGRequestBuilder & aggregation(MockAstVec agg_funcs, MockAstVec group_by_exprs); // window - DAGRequestBuilder & window(ASTPtr window_func, MockOrderByItem order_by, MockPartitionByItem partition_by, MockWindowFrame frame); - DAGRequestBuilder & window(MockAsts window_funcs, MockOrderByItems order_by_list, MockPartitionByItems partition_by_list, MockWindowFrame frame); - DAGRequestBuilder & window(ASTPtr window_func, MockOrderByItems order_by_list, MockPartitionByItems partition_by_list, MockWindowFrame frame); - DAGRequestBuilder & sort(MockOrderByItem order_by, bool is_partial_sort); - DAGRequestBuilder & sort(MockOrderByItems order_by_list, bool is_partial_sort); + DAGRequestBuilder & window(ASTPtr window_func, MockOrderByItem order_by, MockPartitionByItem partition_by, MockWindowFrame frame, uint64_t fine_grained_shuffle_stream_count = 0); + DAGRequestBuilder & window(MockAstVec window_funcs, MockOrderByItemVec order_by_vec, MockPartitionByItemVec partition_by_vec, MockWindowFrame frame, uint64_t fine_grained_shuffle_stream_count = 0); + DAGRequestBuilder & window(ASTPtr window_func, MockOrderByItemVec order_by_vec, MockPartitionByItemVec partition_by_vec, MockWindowFrame frame, uint64_t fine_grained_shuffle_stream_count = 0); + DAGRequestBuilder & sort(MockOrderByItem order_by, bool is_partial_sort, uint64_t fine_grained_shuffle_stream_count = 0); + DAGRequestBuilder & sort(MockOrderByItemVec order_by_vec, bool is_partial_sort, uint64_t fine_grained_shuffle_stream_count = 0); private: void initDAGRequest(tipb::DAGRequest & dag_request); DAGRequestBuilder & buildAggregation(ASTPtr agg_funcs, ASTPtr group_by_exprs); - DAGRequestBuilder & buildExchangeReceiver(const MockColumnInfos & columns); + DAGRequestBuilder & buildExchangeReceiver(const MockColumnInfoVec & columns, uint64_t fine_grained_shuffle_stream_count = 0); ExecutorPtr root; DAGProperties properties; @@ -130,30 +126,25 @@ class MockDAGRequestContext return DAGRequestBuilder(index); } - void addMockTable(const MockTableName & name, const MockColumnInfoList & columnInfos); - void addMockTable(const String & db, const String & table, const MockColumnInfos & columnInfos); - void addMockTable(const MockTableName & name, const MockColumnInfos & columnInfos); - void addExchangeRelationSchema(String name, const MockColumnInfos & columnInfos); - void addExchangeRelationSchema(String name, const MockColumnInfoList & columnInfos); + void addMockTable(const String & db, const String & table, const MockColumnInfoVec & columnInfos); + void addMockTable(const MockTableName & name, const MockColumnInfoVec & columnInfos); + void addExchangeRelationSchema(String name, const MockColumnInfoVec & columnInfos); void addMockTableColumnData(const String & db, const String & table, ColumnsWithTypeAndName columns); - void addMockTable(const String & db, const String & table, const MockColumnInfoList & columnInfos, ColumnsWithTypeAndName columns); - void addMockTable(const String & db, const String & table, const MockColumnInfos & columnInfos, ColumnsWithTypeAndName columns); - void addMockTable(const MockTableName & name, const MockColumnInfoList & columnInfos, ColumnsWithTypeAndName columns); - void addMockTable(const MockTableName & name, const MockColumnInfos & columnInfos, ColumnsWithTypeAndName columns); + void addMockTable(const String & db, const String & table, const MockColumnInfoVec & columnInfos, ColumnsWithTypeAndName columns); + void addMockTable(const MockTableName & name, const MockColumnInfoVec & columnInfos, ColumnsWithTypeAndName columns); void addMockTableColumnData(const MockTableName & name, ColumnsWithTypeAndName columns); void addExchangeReceiverColumnData(const String & name, ColumnsWithTypeAndName columns); - void addExchangeReceiver(const String & name, MockColumnInfos columnInfos, ColumnsWithTypeAndName columns); - void addExchangeReceiver(const String & name, MockColumnInfoList columnInfos, ColumnsWithTypeAndName columns); + void addExchangeReceiver(const String & name, MockColumnInfoVec columnInfos, ColumnsWithTypeAndName columns); std::unordered_map & executorIdColumnsMap() { return executor_id_columns_map; } DAGRequestBuilder scan(String db_name, String table_name); - DAGRequestBuilder receive(String exchange_name); + DAGRequestBuilder receive(String exchange_name, uint64_t fine_grained_shuffle_stream_count = 0); private: size_t index; - std::unordered_map mock_tables; - std::unordered_map exchange_schemas; + std::unordered_map mock_tables; + std::unordered_map exchange_schemas; std::unordered_map mock_table_columns; std::unordered_map mock_exchange_columns; std::unordered_map executor_id_columns_map; /// @@ -168,21 +159,23 @@ class MockDAGRequestContext ASTPtr buildColumn(const String & column_name); ASTPtr buildLiteral(const Field & field); -ASTPtr buildFunction(MockAsts exprs, const String & name); -ASTPtr buildOrderByItemList(MockOrderByItems order_by_items); +ASTPtr buildFunction(MockAstVec exprs, const String & name); +ASTPtr buildOrderByItemVec(MockOrderByItemVec order_by_items); MockWindowFrame buildDefaultRowsFrame(); #define col(name) buildColumn((name)) #define lit(field) buildLiteral((field)) +#define concat(expr1, expr2) makeASTFunction("concat", (expr1), (expr2)) #define eq(expr1, expr2) makeASTFunction("equals", (expr1), (expr2)) #define Not_eq(expr1, expr2) makeASTFunction("notEquals", (expr1), (expr2)) #define lt(expr1, expr2) makeASTFunction("less", (expr1), (expr2)) #define gt(expr1, expr2) makeASTFunction("greater", (expr1), (expr2)) #define And(expr1, expr2) makeASTFunction("and", (expr1), (expr2)) #define Or(expr1, expr2) makeASTFunction("or", (expr1), (expr2)) -#define NOT(expr) makeASTFunction("not", (expr1), (expr2)) -#define Max(expr) makeASTFunction("max", expr) +#define NOT(expr) makeASTFunction("not", (expr)) +#define Max(expr) makeASTFunction("max", (expr)) +#define Sum(expr) makeASTFunction("sum", (expr)) /// Window functions #define RowNumber() makeASTFunction("RowNumber") #define Rank() makeASTFunction("Rank") diff --git a/dbms/src/TestUtils/tests/gtest_mock_executors.cpp b/dbms/src/TestUtils/tests/gtest_mock_executors.cpp index 8bed0f2fc6c..72f0bb505d1 100644 --- a/dbms/src/TestUtils/tests/gtest_mock_executors.cpp +++ b/dbms/src/TestUtils/tests/gtest_mock_executors.cpp @@ -76,7 +76,7 @@ TEST_F(MockDAGRequestTest, Projection) try { auto request = context.scan("test_db", "test_table") - .project("s1") + .project({"s1"}) .build(context); { String expected = "project_1 | {<0, String>}\n" diff --git a/dbms/src/TestUtils/tests/gtest_print_columns.cpp b/dbms/src/TestUtils/tests/gtest_print_columns.cpp new file mode 100644 index 00000000000..50631fc4f4a --- /dev/null +++ b/dbms/src/TestUtils/tests/gtest_print_columns.cpp @@ -0,0 +1,57 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +namespace DB +{ +namespace tests +{ + +class PrintColumnsTest : public DB::tests::ExecutorTest +{ +public: + using ColStringType = std::optional::FieldType>; + using ColInt32Type = std::optional::FieldType>; + using ColumnWithString = std::vector; + using ColumnWithInt32 = std::vector; + + void initializeContext() override + { + test_cols.push_back(toNullableVec("col1", ColumnWithInt32{36, 34, 32, 27, {}, {}})); + test_cols.push_back(toNullableVec("col2", ColumnWithString{"female", "male", "male", "female", "male", "female"})); + col_len = test_cols[0].column->size(); + } + + ColumnsWithTypeAndName test_cols; + size_t col_len; + const String result1{"col1: (0: Int64_36, 1: Int64_34, 2: Int64_32, 3: Int64_27, 4: NULL, 5: NULL)\ncol2: (0: 'female', 1: 'male', 2: 'male', 3: 'female', 4: 'male', 5: 'female')\n"}; + const String result2{"col1: (0: Int64_36, 1: Int64_34, 2: Int64_32, 3: Int64_27, 4: NULL, 5: NULL)\ncol2: (0: 'female', 1: 'male', 2: 'male', 3: 'female', 4: 'male', 5: 'female')\n"}; + const String result3{"col1: (0: Int64_36)\ncol2: (0: 'female')\n"}; + const String result4{"col1: (1: Int64_34, 2: Int64_32, 3: Int64_27, 4: NULL)\ncol2: (1: 'male', 2: 'male', 3: 'female', 4: 'male')\n"}; +}; + +TEST_F(PrintColumnsTest, SimpleTest) +try +{ + EXPECT_EQ(getColumnsContent(test_cols), result1); + EXPECT_EQ(getColumnsContent(test_cols, 0, col_len - 1), result2); + EXPECT_EQ(getColumnsContent(test_cols, 0, 0), result3); + EXPECT_EQ(getColumnsContent(test_cols, 1, col_len - 2), result4); +} +CATCH + +} // namespace tests +} // namespace DB diff --git a/dbms/src/TiDB/Schema/SchemaBuilder.cpp b/dbms/src/TiDB/Schema/SchemaBuilder.cpp index ae78923fc61..6e4ad10e344 100644 --- a/dbms/src/TiDB/Schema/SchemaBuilder.cpp +++ b/dbms/src/TiDB/Schema/SchemaBuilder.cpp @@ -36,6 +36,7 @@ #include #include #include +#include #include #include #include @@ -320,7 +321,7 @@ inline SchemaChanges detectSchemaChanges( } template -void SchemaBuilder::applyAlterPhysicalTable(DBInfoPtr db_info, TableInfoPtr table_info, ManageableStoragePtr storage) +void SchemaBuilder::applyAlterPhysicalTable(const DBInfoPtr & db_info, const TableInfoPtr & table_info, const ManageableStoragePtr & storage) { LOG_FMT_INFO(log, "Altering table {}", name_mapper.debugCanonicalName(*db_info, *table_info)); @@ -394,7 +395,7 @@ void SchemaBuilder::applyAlterPhysicalTable(DBInfoPtr db_inf } template -void SchemaBuilder::applyAlterTable(DBInfoPtr db_info, TableID table_id) +void SchemaBuilder::applyAlterTable(const DBInfoPtr & db_info, TableID table_id) { auto table_info = getter.getTableInfo(db_info->id, table_id); if (table_info == nullptr) @@ -413,7 +414,7 @@ void SchemaBuilder::applyAlterTable(DBInfoPtr db_info, Table } template -void SchemaBuilder::applyAlterLogicalTable(DBInfoPtr db_info, TableInfoPtr table_info, ManageableStoragePtr storage) +void SchemaBuilder::applyAlterLogicalTable(const DBInfoPtr & db_info, const TableInfoPtr & table_info, const ManageableStoragePtr & storage) { // Alter logical table first. applyAlterPhysicalTable(db_info, table_info, storage); @@ -542,6 +543,11 @@ void SchemaBuilder::applyDiff(const SchemaDiff & diff) applySetTiFlashReplica(db_info, diff.table_id); break; } + case SchemaActionType::SetTiFlashMode: + { + applySetTiFlashMode(db_info, diff.table_id); + break; + } default: { if (diff.type < SchemaActionType::MaxRecognizedType) @@ -569,7 +575,7 @@ void SchemaBuilder::applyDiff(const SchemaDiff & diff) } template -void SchemaBuilder::applyPartitionDiff(TiDB::DBInfoPtr db_info, TableID table_id) +void SchemaBuilder::applyPartitionDiff(const TiDB::DBInfoPtr & db_info, TableID table_id) { auto table_info = getter.getTableInfo(db_info->id, table_id); if (table_info == nullptr) @@ -593,7 +599,7 @@ void SchemaBuilder::applyPartitionDiff(TiDB::DBInfoPtr db_in } template -void SchemaBuilder::applyPartitionDiff(TiDB::DBInfoPtr db_info, TableInfoPtr table_info, ManageableStoragePtr storage) +void SchemaBuilder::applyPartitionDiff(const TiDB::DBInfoPtr & db_info, const TableInfoPtr & table_info, const ManageableStoragePtr & storage) { const auto & orig_table_info = storage->getTableInfo(); if (!orig_table_info.isLogicalPartitionTable()) @@ -659,7 +665,7 @@ void SchemaBuilder::applyPartitionDiff(TiDB::DBInfoPtr db_in } template -void SchemaBuilder::applyRenameTable(DBInfoPtr new_db_info, TableID table_id) +void SchemaBuilder::applyRenameTable(const DBInfoPtr & new_db_info, TableID table_id) { auto new_table_info = getter.getTableInfo(new_db_info->id, table_id); if (new_table_info == nullptr) @@ -679,9 +685,9 @@ void SchemaBuilder::applyRenameTable(DBInfoPtr new_db_info, template void SchemaBuilder::applyRenameLogicalTable( - DBInfoPtr new_db_info, - TableInfoPtr new_table_info, - ManageableStoragePtr storage) + const DBInfoPtr & new_db_info, + const TableInfoPtr & new_table_info, + const ManageableStoragePtr & storage) { applyRenamePhysicalTable(new_db_info, *new_table_info, storage); @@ -703,9 +709,9 @@ void SchemaBuilder::applyRenameLogicalTable( template void SchemaBuilder::applyRenamePhysicalTable( - DBInfoPtr new_db_info, - TableInfo & new_table_info, - ManageableStoragePtr storage) + const DBInfoPtr & new_db_info, + const TableInfo & new_table_info, + const ManageableStoragePtr & storage) { const auto old_mapped_db_name = storage->getDatabaseName(); const auto new_mapped_db_name = name_mapper.mapDatabaseName(*new_db_info); @@ -908,7 +914,7 @@ String createDatabaseStmt(Context & context, const DBInfo & db_info, const Schem } template -void SchemaBuilder::applyCreateSchema(TiDB::DBInfoPtr db_info) +void SchemaBuilder::applyCreateSchema(const TiDB::DBInfoPtr & db_info) { GET_METRIC(tiflash_schema_internal_ddl_count, type_create_db).Increment(); LOG_FMT_INFO(log, "Creating database {}", name_mapper.debugDatabaseName(*db_info)); @@ -1047,7 +1053,7 @@ String createTableStmt( } template -void SchemaBuilder::applyCreatePhysicalTable(DBInfoPtr db_info, TableInfoPtr table_info) +void SchemaBuilder::applyCreatePhysicalTable(const DBInfoPtr & db_info, const TableInfoPtr & table_info) { GET_METRIC(tiflash_schema_internal_ddl_count, type_create_table).Increment(); LOG_FMT_INFO(log, "Creating table {}", name_mapper.debugCanonicalName(*db_info, *table_info)); @@ -1109,7 +1115,7 @@ void SchemaBuilder::applyCreatePhysicalTable(DBInfoPtr db_in } template -void SchemaBuilder::applyCreateTable(TiDB::DBInfoPtr db_info, TableID table_id) +void SchemaBuilder::applyCreateTable(const TiDB::DBInfoPtr & db_info, TableID table_id) { auto table_info = getter.getTableInfo(db_info->id, table_id); if (table_info == nullptr) @@ -1123,7 +1129,7 @@ void SchemaBuilder::applyCreateTable(TiDB::DBInfoPtr db_info } template -void SchemaBuilder::applyCreateLogicalTable(TiDB::DBInfoPtr db_info, TableInfoPtr table_info) +void SchemaBuilder::applyCreateLogicalTable(const TiDB::DBInfoPtr & db_info, const TableInfoPtr & table_info) { if (table_info->isLogicalPartitionTable()) { @@ -1169,7 +1175,7 @@ void SchemaBuilder::applyDropPhysicalTable(const String & db } template -void SchemaBuilder::applyDropTable(DBInfoPtr db_info, TableID table_id) +void SchemaBuilder::applyDropTable(const DBInfoPtr & db_info, TableID table_id) { auto & tmt_context = context.getTMTContext(); auto * storage = tmt_context.getStorages().get(table_id).get(); @@ -1193,13 +1199,14 @@ void SchemaBuilder::applyDropTable(DBInfoPtr db_info, TableI } template -void SchemaBuilder::applySetTiFlashReplica(TiDB::DBInfoPtr db_info, TableID table_id) +void SchemaBuilder::applySetTiFlashReplica(const TiDB::DBInfoPtr & db_info, TableID table_id) { auto latest_table_info = getter.getTableInfo(db_info->id, table_id); if (unlikely(latest_table_info == nullptr)) { throw TiFlashException(fmt::format("miss table in TiKV : {}", table_id), Errors::DDL::StaleSchema); } + auto & tmt_context = context.getTMTContext(); auto storage = tmt_context.getStorages().get(latest_table_info->id); if (unlikely(storage == nullptr)) @@ -1208,18 +1215,37 @@ void SchemaBuilder::applySetTiFlashReplica(TiDB::DBInfoPtr d Errors::DDL::MissingTable); } - auto managed_storage = std::dynamic_pointer_cast(storage); - if (unlikely(!managed_storage)) - throw Exception(fmt::format("{} is not a ManageableStorage", name_mapper.debugCanonicalName(*db_info, *latest_table_info))); + applySetTiFlashReplicaOnLogicalTable(db_info, latest_table_info, storage); +} - applySetTiFlashReplica(db_info, latest_table_info, managed_storage); +template +void SchemaBuilder::applySetTiFlashReplicaOnLogicalTable(const TiDB::DBInfoPtr & db_info, const TiDB::TableInfoPtr & table_info, const ManageableStoragePtr & storage) +{ + applySetTiFlashReplicaOnPhysicalTable(db_info, table_info, storage); + + if (table_info->isLogicalPartitionTable()) + { + auto & tmt_context = context.getTMTContext(); + + for (const auto & part_def : table_info->partition.definitions) + { + auto new_part_table_info = table_info->producePartitionTableInfo(part_def.id, name_mapper); + auto part_storage = tmt_context.getStorages().get(new_part_table_info->id); + if (unlikely(part_storage == nullptr)) + { + throw TiFlashException(fmt::format("miss table in TiFlash : {}", name_mapper.debugCanonicalName(*db_info, *new_part_table_info)), + Errors::DDL::MissingTable); + } + applySetTiFlashReplicaOnPhysicalTable(db_info, new_part_table_info, part_storage); + } + } } template -void SchemaBuilder::applySetTiFlashReplica( - TiDB::DBInfoPtr db_info, - TiDB::TableInfoPtr latest_table_info, - ManageableStoragePtr storage) +void SchemaBuilder::applySetTiFlashReplicaOnPhysicalTable( + const TiDB::DBInfoPtr & db_info, + const TiDB::TableInfoPtr & latest_table_info, + const ManageableStoragePtr & storage) { if (storage->getTableInfo().replica_info.count == latest_table_info->replica_info.count) return; @@ -1238,6 +1264,75 @@ void SchemaBuilder::applySetTiFlashReplica( LOG_FMT_INFO(log, "Updated replica info for {}", name_mapper.debugCanonicalName(*db_info, table_info)); } + +template +void SchemaBuilder::applySetTiFlashMode(const TiDB::DBInfoPtr & db_info, TableID table_id) +{ + auto latest_table_info = getter.getTableInfo(db_info->id, table_id); + + if (unlikely(latest_table_info == nullptr)) + { + throw TiFlashException(fmt::format("miss table in TiKV : {}", table_id), Errors::DDL::StaleSchema); + } + + auto & tmt_context = context.getTMTContext(); + auto storage = tmt_context.getStorages().get(latest_table_info->id); + if (unlikely(storage == nullptr)) + { + throw TiFlashException(fmt::format("miss table in TiFlash : {}", name_mapper.debugCanonicalName(*db_info, *latest_table_info)), + Errors::DDL::MissingTable); + } + + applySetTiFlashModeOnLogicalTable(db_info, latest_table_info, storage); +} + +template +void SchemaBuilder::applySetTiFlashModeOnLogicalTable( + const TiDB::DBInfoPtr & db_info, + const TiDB::TableInfoPtr & table_info, + const ManageableStoragePtr & storage) +{ + applySetTiFlashModeOnPhysicalTable(db_info, table_info, storage); + + if (table_info->isLogicalPartitionTable()) + { + auto & tmt_context = context.getTMTContext(); + for (const auto & part_def : table_info->partition.definitions) + { + auto new_part_table_info = table_info->producePartitionTableInfo(part_def.id, name_mapper); + auto part_storage = tmt_context.getStorages().get(table_info->id); + if (unlikely(part_storage == nullptr)) + { + throw TiFlashException(fmt::format("miss table in TiFlash : {}", name_mapper.debugCanonicalName(*db_info, *new_part_table_info)), + Errors::DDL::MissingTable); + } + applySetTiFlashModeOnPhysicalTable(db_info, new_part_table_info, part_storage); + } + } +} + + +template +void SchemaBuilder::applySetTiFlashModeOnPhysicalTable( + const TiDB::DBInfoPtr & db_info, + const TiDB::TableInfoPtr & latest_table_info, + const ManageableStoragePtr & storage) +{ + if (storage->getTableInfo().tiflash_mode == latest_table_info->tiflash_mode) + return; + + TiDB::TableInfo table_info = storage->getTableInfo(); + table_info.tiflash_mode = latest_table_info->tiflash_mode; + AlterCommands commands; + + LOG_FMT_INFO(log, "Updating tiflash mode for {} to {}", name_mapper.debugCanonicalName(*db_info, table_info), TiFlashModeToString(table_info.tiflash_mode)); + + auto alter_lock = storage->lockForAlter(getThreadName()); + storage->alterFromTiDB(alter_lock, commands, name_mapper.mapDatabaseName(*db_info), table_info, name_mapper, context); + LOG_FMT_INFO(log, "Updated tiflash mode for {} to {}", name_mapper.debugCanonicalName(*db_info, table_info), TiFlashModeToString(table_info.tiflash_mode)); +} + + template void SchemaBuilder::syncAllSchema() { @@ -1306,7 +1401,9 @@ void SchemaBuilder::syncAllSchema() /// Rename if needed. applyRenameLogicalTable(db, table, storage); /// Update replica info if needed. - applySetTiFlashReplica(db, table, storage); + applySetTiFlashReplicaOnLogicalTable(db, table, storage); + /// Update tiflash mode if needed. + applySetTiFlashModeOnLogicalTable(db, table, storage); /// Alter if needed. applyAlterLogicalTable(db, table, storage); LOG_FMT_DEBUG(log, "Table {} synced during sync all schemas", name_mapper.debugCanonicalName(*db, *table)); diff --git a/dbms/src/TiDB/Schema/SchemaBuilder.h b/dbms/src/TiDB/Schema/SchemaBuilder.h index 8446765f74a..827203a682f 100644 --- a/dbms/src/TiDB/Schema/SchemaBuilder.h +++ b/dbms/src/TiDB/Schema/SchemaBuilder.h @@ -55,39 +55,44 @@ struct SchemaBuilder bool applyCreateSchema(DatabaseID schema_id); - void applyCreateSchema(TiDB::DBInfoPtr db_info); + void applyCreateSchema(const TiDB::DBInfoPtr & db_info); - void applyCreateTable(TiDB::DBInfoPtr db_info, TableID table_id); + void applyCreateTable(const TiDB::DBInfoPtr & db_info, TableID table_id); - void applyCreateLogicalTable(TiDB::DBInfoPtr db_info, TiDB::TableInfoPtr table_info); + void applyCreateLogicalTable(const TiDB::DBInfoPtr & db_info, const TiDB::TableInfoPtr & table_info); - void applyCreatePhysicalTable(TiDB::DBInfoPtr db_info, TiDB::TableInfoPtr table_info); + void applyCreatePhysicalTable(const TiDB::DBInfoPtr & db_info, const TiDB::TableInfoPtr & table_info); - void applyDropTable(TiDB::DBInfoPtr db_info, TableID table_id); + void applyDropTable(const TiDB::DBInfoPtr & db_info, TableID table_id); /// Parameter schema_name should be mapped. void applyDropPhysicalTable(const String & db_name, TableID table_id); - void applyPartitionDiff(TiDB::DBInfoPtr db_info, TableID table_id); + void applyPartitionDiff(const TiDB::DBInfoPtr & db_info, TableID table_id); - void applyPartitionDiff(TiDB::DBInfoPtr db_info, TiDB::TableInfoPtr table_info, ManageableStoragePtr storage); + void applyPartitionDiff(const TiDB::DBInfoPtr & db_info, const TiDB::TableInfoPtr & table_info, const ManageableStoragePtr & storage); - void applyAlterTable(TiDB::DBInfoPtr db_info, TableID table_id); + void applyAlterTable(const TiDB::DBInfoPtr & db_info, TableID table_id); - void applyAlterLogicalTable(TiDB::DBInfoPtr db_info, TiDB::TableInfoPtr table_info, ManageableStoragePtr storage); + void applyAlterLogicalTable(const TiDB::DBInfoPtr & db_info, const TiDB::TableInfoPtr & table_info, const ManageableStoragePtr & storage); - void applyAlterPhysicalTable(TiDB::DBInfoPtr db_info, TiDB::TableInfoPtr table_info, ManageableStoragePtr storage); + void applyAlterPhysicalTable(const TiDB::DBInfoPtr & db_info, const TiDB::TableInfoPtr & table_info, const ManageableStoragePtr & storage); - void applyRenameTable(TiDB::DBInfoPtr new_db_info, TiDB::TableID table_id); + void applyRenameTable(const TiDB::DBInfoPtr & new_db_info, TiDB::TableID table_id); - void applyRenameLogicalTable(TiDB::DBInfoPtr new_db_info, TiDB::TableInfoPtr new_table_info, ManageableStoragePtr storage); + void applyRenameLogicalTable(const TiDB::DBInfoPtr & new_db_info, const TiDB::TableInfoPtr & new_table_info, const ManageableStoragePtr & storage); - void applyRenamePhysicalTable(TiDB::DBInfoPtr new_db_info, TiDB::TableInfo & new_table_info, ManageableStoragePtr storage); + void applyRenamePhysicalTable(const TiDB::DBInfoPtr & new_db_info, const TiDB::TableInfo & new_table_info, const ManageableStoragePtr & storage); void applyExchangeTablePartition(const SchemaDiff & diff); - void applySetTiFlashReplica(TiDB::DBInfoPtr db_info, TableID table_id); - void applySetTiFlashReplica(TiDB::DBInfoPtr db_info, TiDB::TableInfoPtr table_info, ManageableStoragePtr storage); + void applySetTiFlashReplica(const TiDB::DBInfoPtr & db_info, TableID table_id); + void applySetTiFlashReplicaOnLogicalTable(const TiDB::DBInfoPtr & db_info, const TiDB::TableInfoPtr & table_info, const ManageableStoragePtr & storage); + void applySetTiFlashReplicaOnPhysicalTable(const TiDB::DBInfoPtr & db_info, const TiDB::TableInfoPtr & table_info, const ManageableStoragePtr & storage); + + void applySetTiFlashMode(const TiDB::DBInfoPtr & db_info, TableID table_id); + void applySetTiFlashModeOnLogicalTable(const TiDB::DBInfoPtr & db_info, const TiDB::TableInfoPtr & table_info, const ManageableStoragePtr & storage); + void applySetTiFlashModeOnPhysicalTable(const TiDB::DBInfoPtr & db_info, const TiDB::TableInfoPtr & table_info, const ManageableStoragePtr & storage); }; } // namespace DB diff --git a/dbms/src/TiDB/Schema/SchemaGetter.cpp b/dbms/src/TiDB/Schema/SchemaGetter.cpp index 7f52f9301b1..6e333d6ba87 100644 --- a/dbms/src/TiDB/Schema/SchemaGetter.cpp +++ b/dbms/src/TiDB/Schema/SchemaGetter.cpp @@ -19,7 +19,6 @@ namespace DB { - namespace ErrorCodes { extern const int SCHEMA_SYNC_ERROR; @@ -188,18 +187,26 @@ Int64 SchemaGetter::getVersion() return std::stoll(ver); } +bool SchemaGetter::checkSchemaDiffExists(Int64 ver) +{ + String key = getSchemaDiffKey(ver); + String data = TxnStructure::get(snap, key); + return !data.empty(); +} + String SchemaGetter::getSchemaDiffKey(Int64 ver) { return std::string(schemaDiffPrefix) + ":" + std::to_string(ver); } -SchemaDiff SchemaGetter::getSchemaDiff(Int64 ver) +std::optional SchemaGetter::getSchemaDiff(Int64 ver) { String key = getSchemaDiffKey(ver); String data = TxnStructure::get(snap, key); if (data.empty()) { - throw TiFlashException("cannot find schema diff for version: " + std::to_string(ver), Errors::Table::SyncError); + LOG_FMT_WARNING(log, "The schema diff for version {}, key {} is empty.", ver, key); + return std::nullopt; } SchemaDiff diff; diff.deserialize(data); diff --git a/dbms/src/TiDB/Schema/SchemaGetter.h b/dbms/src/TiDB/Schema/SchemaGetter.h index 02d2f7a7c88..72fd00678f7 100644 --- a/dbms/src/TiDB/Schema/SchemaGetter.h +++ b/dbms/src/TiDB/Schema/SchemaGetter.h @@ -26,6 +26,8 @@ #include +#include + namespace DB { // The enum results are completely the same as the DDL Action listed in the "parser/model/ddl.go" of TiDB codebase, which must be keeping in sync. @@ -92,11 +94,14 @@ enum class SchemaActionType : Int8 AlterTableStatsOptions = 58, AlterNoCacheTable = 59, CreateTables = 60, + ActionMultiSchemaChange = 61, + SetTiFlashMode = 62, + // If we supporte new type from TiDB. // MaxRecognizedType also needs to be changed. // It should always be equal to the maximum supported type + 1 - MaxRecognizedType = 61, + MaxRecognizedType = 63, }; struct AffectedOption @@ -138,7 +143,9 @@ struct SchemaGetter Int64 getVersion(); - SchemaDiff getSchemaDiff(Int64 ver); + bool checkSchemaDiffExists(Int64 ver); + + std::optional getSchemaDiff(Int64 ver); static String getSchemaDiffKey(Int64 ver); diff --git a/dbms/src/TiDB/Schema/TiDBSchemaSyncer.h b/dbms/src/TiDB/Schema/TiDBSchemaSyncer.h index 4fdba195acb..a23aeab139f 100644 --- a/dbms/src/TiDB/Schema/TiDBSchemaSyncer.h +++ b/dbms/src/TiDB/Schema/TiDBSchemaSyncer.h @@ -106,21 +106,31 @@ struct TiDBSchemaSyncer : public SchemaSyncer Stopwatch watch; SCOPE_EXIT({ GET_METRIC(tiflash_schema_apply_duration_seconds).Observe(watch.elapsedSeconds()); }); - LOG_FMT_INFO(log, "start to sync schemas. current version is: {} and try to sync schema version to: {}", cur_version, version); + LOG_FMT_INFO(log, "Start to sync schemas. current version is: {} and try to sync schema version to: {}", cur_version, version); // Show whether the schema mutex is held for a long time or not. GET_METRIC(tiflash_schema_applying).Set(1.0); SCOPE_EXIT({ GET_METRIC(tiflash_schema_applying).Set(0.0); }); GET_METRIC(tiflash_schema_apply_count, type_diff).Increment(); - if (!tryLoadSchemaDiffs(getter, version, context)) + // After the feature concurrent DDL, TiDB does `update schema version` before `set schema diff`, and they are done in separate transactions. + // So TiFlash may see a schema version X but no schema diff X, meaning that the transaction of schema diff X has not been committed or has + // been aborted. + // However, TiDB makes sure that if we get a schema version X, then the schema diff X-1 must exist. Otherwise the transaction of schema diff + // X-1 is aborted and we can safely ignore it. + // Since TiDB can not make sure the schema diff of the latest schema version X is not empty, under this situation we should set the `cur_version` + // to X-1 and try to fetch the schema diff X next time. + Int64 version_after_load_diff = 0; + if (version_after_load_diff = tryLoadSchemaDiffs(getter, version, context); version_after_load_diff == -1) { GET_METRIC(tiflash_schema_apply_count, type_full).Increment(); loadAllSchema(getter, version, context); + // After loadAllSchema, we need update `version_after_load_diff` by last diff value exist or not + version_after_load_diff = getter.checkSchemaDiffExists(version) ? version : version - 1; } - cur_version = version; + cur_version = version_after_load_diff; GET_METRIC(tiflash_schema_version).Set(cur_version); - LOG_FMT_INFO(log, "end sync schema, version has been updated to {}", cur_version); + LOG_FMT_INFO(log, "End sync schema, version has been updated to {}{}", cur_version, cur_version == version ? "" : "(latest diff is empty)"); return true; } @@ -144,30 +154,60 @@ struct TiDBSchemaSyncer : public SchemaSyncer return it->second; } - bool tryLoadSchemaDiffs(Getter & getter, Int64 version, Context & context) + // Return Values + // - if latest schema diff is not empty, return the (latest_version) + // - if latest schema diff is empty, return the (latest_version - 1) + // - if error happend, return (-1) + Int64 tryLoadSchemaDiffs(Getter & getter, Int64 latest_version, Context & context) { - if (isTooOldSchema(cur_version, version)) + if (isTooOldSchema(cur_version, latest_version)) { - return false; + return -1; } - LOG_FMT_DEBUG(log, "try load schema diffs."); + LOG_FMT_DEBUG(log, "Try load schema diffs."); - SchemaBuilder builder(getter, context, databases, version); + SchemaBuilder builder(getter, context, databases, latest_version); Int64 used_version = cur_version; - std::vector diffs; - while (used_version < version) + // First get all schema diff from `cur_version` to `latest_version`. Only apply the schema diff(s) if we fetch all + // schema diff without any exception. + std::vector> diffs; + while (used_version < latest_version) { used_version++; diffs.push_back(getter.getSchemaDiff(used_version)); } - LOG_FMT_DEBUG(log, "end load schema diffs with total {} entries.", diffs.size()); + LOG_FMT_DEBUG(log, "End load schema diffs with total {} entries.", diffs.size()); + try { - for (const auto & diff : diffs) + for (size_t diff_index = 0; diff_index < diffs.size(); ++diff_index) { - builder.applyDiff(diff); + const auto & schema_diff = diffs[diff_index]; + + if (!schema_diff) + { + // If `schema diff` from `latest_version` got empty `schema diff` + // Then we won't apply to `latest_version`, but we will apply to `latest_version - 1` + // If `schema diff` from [`cur_version`, `latest_version - 1`] got empty `schema diff` + // Then we should just skip it. + // + // example: + // - `cur_version` is 1, `latest_version` is 10 + // - The schema diff of schema version [2,4,6] is empty, Then we just skip it. + // - The schema diff of schema version 10 is empty, Then we should just apply version into 9 + if (diff_index != diffs.size() - 1) + { + LOG_FMT_WARNING(log, "Skip the schema diff from version {}. ", cur_version + diff_index + 1); + continue; + } + + // if diff_index == diffs.size() - 1, return used_version - 1; + return used_version - 1; + } + + builder.applyDiff(*schema_diff); } } catch (TiFlashException & e) @@ -177,7 +217,7 @@ struct TiDBSchemaSyncer : public SchemaSyncer GET_METRIC(tiflash_schema_apply_count, type_failed).Increment(); } LOG_FMT_WARNING(log, "apply diff meets exception : {} \n stack is {}", e.displayText(), e.getStackTrace().toString()); - return false; + return -1; } catch (Exception & e) { @@ -187,21 +227,22 @@ struct TiDBSchemaSyncer : public SchemaSyncer } GET_METRIC(tiflash_schema_apply_count, type_failed).Increment(); LOG_FMT_WARNING(log, "apply diff meets exception : {} \n stack is {}", e.displayText(), e.getStackTrace().toString()); - return false; + return -1; } catch (Poco::Exception & e) { GET_METRIC(tiflash_schema_apply_count, type_failed).Increment(); LOG_FMT_WARNING(log, "apply diff meets exception : {}", e.displayText()); - return false; + return -1; } catch (std::exception & e) { GET_METRIC(tiflash_schema_apply_count, type_failed).Increment(); LOG_FMT_WARNING(log, "apply diff meets exception : {}", e.what()); - return false; + return -1; } - return true; + + return used_version; } void loadAllSchema(Getter & getter, Int64 version, Context & context) diff --git a/dbms/src/WindowFunctions/tests/gtest_window_functions.cpp b/dbms/src/WindowFunctions/tests/gtest_window_functions.cpp index 3addf73a642..06253cac66e 100644 --- a/dbms/src/WindowFunctions/tests/gtest_window_functions.cpp +++ b/dbms/src/WindowFunctions/tests/gtest_window_functions.cpp @@ -69,11 +69,10 @@ try .sort({{"partition", false}, {"order", false}, {"partition", false}, {"order", false}}, true) .window(RowNumber(), {"order", false}, {"partition", false}, buildDefaultRowsFrame()) .build(context); - executeStreams( - request, - {toNullableVec("partition", {1, 1, 1, 1, 2, 2, 2, 2}), - toNullableVec("order", {1, 1, 2, 2, 1, 1, 2, 2}), - toNullableVec("row_number", {1, 2, 3, 4, 1, 2, 3, 4})}); + ASSERT_COLUMNS_EQ_R(executeStreams(request), + createColumns({toNullableVec("partition", {1, 1, 1, 1, 2, 2, 2, 2}), + toNullableVec("order", {1, 1, 2, 2, 1, 1, 2, 2}), + toNullableVec("row_number", {1, 2, 3, 4, 1, 2, 3, 4})})); // null input executeStreamsWithSingleSource( @@ -82,10 +81,8 @@ try {}); // nullable - executeStreamsWithSingleSource( - request, - {toNullableVec("partition", {{}, 1, 1, 1, 1, 2, 2, 2, 2}), {toNullableVec("order", {{}, 1, 1, 2, 2, 1, 1, 2, 2})}}, - {toNullableVec("partition", {{}, 1, 1, 1, 1, 2, 2, 2, 2}), toNullableVec("order", {{}, 1, 1, 2, 2, 1, 1, 2, 2}), toNullableVec("row_number", {1, 1, 2, 3, 4, 1, 2, 3, 4})}); + ASSERT_COLUMNS_EQ_R(executeStreamsWithSingleSource(request, {toNullableVec("partition", {{}, 1, 1, 1, 1, 2, 2, 2, 2}), {toNullableVec("order", {{}, 1, 1, 2, 2, 1, 1, 2, 2})}}), + createColumns({toNullableVec("partition", {{}, 1, 1, 1, 1, 2, 2, 2, 2}), toNullableVec("order", {{}, 1, 1, 2, 2, 1, 1, 2, 2}), toNullableVec("row_number", {1, 1, 2, 3, 4, 1, 2, 3, 4})})); // string - sql : select *, row_number() over w1 from test2 window w1 as (partition by partition_string order by order_string) request = context @@ -94,20 +91,18 @@ try .window(RowNumber(), {"order", false}, {"partition", false}, buildDefaultRowsFrame()) .build(context); - executeStreams( - request, - {toNullableVec("partition", {"apple", "apple", "apple", "apple", "banana", "banana", "banana", "banana"}), - toNullableVec("order", {"apple", "apple", "banana", "banana", "apple", "apple", "banana", "banana"}), - toNullableVec("row_number", {1, 2, 3, 4, 1, 2, 3, 4})}); + ASSERT_COLUMNS_EQ_R(executeStreams(request), + createColumns({toNullableVec("partition", {"apple", "apple", "apple", "apple", "banana", "banana", "banana", "banana"}), + toNullableVec("order", {"apple", "apple", "banana", "banana", "apple", "apple", "banana", "banana"}), + toNullableVec("row_number", {1, 2, 3, 4, 1, 2, 3, 4})})); // nullable - executeStreamsWithSingleSource( - request, - {toNullableVec("partition", {"banana", "banana", "banana", "banana", {}, "apple", "apple", "apple", "apple"}), - toNullableVec("order", {"apple", "apple", "banana", "banana", {}, "apple", "apple", "banana", "banana"})}, - {toNullableVec("partition", {{}, "apple", "apple", "apple", "apple", "banana", "banana", "banana", "banana"}), - toNullableVec("order", {{}, "apple", "apple", "banana", "banana", "apple", "apple", "banana", "banana"}), - toNullableVec("row_number", {1, 1, 2, 3, 4, 1, 2, 3, 4})}); + ASSERT_COLUMNS_EQ_R(executeStreamsWithSingleSource(request, + {toNullableVec("partition", {"banana", "banana", "banana", "banana", {}, "apple", "apple", "apple", "apple"}), + toNullableVec("order", {"apple", "apple", "banana", "banana", {}, "apple", "apple", "banana", "banana"})}), + createColumns({toNullableVec("partition", {{}, "apple", "apple", "apple", "apple", "banana", "banana", "banana", "banana"}), + toNullableVec("order", {{}, "apple", "apple", "banana", "banana", "apple", "apple", "banana", "banana"}), + toNullableVec("row_number", {1, 1, 2, 3, 4, 1, 2, 3, 4})})); // float64 - sql : select *, row_number() over w1 from test3 window w1 as (partition by partition_float order by order_float64) request = context @@ -116,20 +111,18 @@ try .window(RowNumber(), {"order", false}, {"partition", false}, buildDefaultRowsFrame()) .build(context); - executeStreams( - request, - {toNullableVec("partition", {1.00, 1.00, 1.00, 1.00, 2.00, 2.00, 2.00, 2.00}), - toNullableVec("order", {1.00, 1.00, 2.00, 2.00, 1.00, 1.00, 2.00, 2.00}), - toNullableVec("row_number", {1, 2, 3, 4, 1, 2, 3, 4})}); + ASSERT_COLUMNS_EQ_R(executeStreams(request), + createColumns({toNullableVec("partition", {1.00, 1.00, 1.00, 1.00, 2.00, 2.00, 2.00, 2.00}), + toNullableVec("order", {1.00, 1.00, 2.00, 2.00, 1.00, 1.00, 2.00, 2.00}), + toNullableVec("row_number", {1, 2, 3, 4, 1, 2, 3, 4})})); // nullable - executeStreamsWithSingleSource( - request, - {toNullableVec("partition", {{}, 1.00, 1.00, 1.00, 1.00, 2.00, 2.00, 2.00, 2.00}), - toNullableVec("order", {{}, 1.00, 1.00, 2.00, 2.00, 1.00, 1.00, 2.00, 2.00})}, - {toNullableVec("partition", {{}, 1.00, 1.00, 1.00, 1.00, 2.00, 2.00, 2.00, 2.00}), - toNullableVec("order", {{}, 1.00, 1.00, 2.00, 2.00, 1.00, 1.00, 2.00, 2.00}), - toNullableVec("row_number", {1, 1, 2, 3, 4, 1, 2, 3, 4})}); + ASSERT_COLUMNS_EQ_R(executeStreamsWithSingleSource(request, + {toNullableVec("partition", {{}, 1.00, 1.00, 1.00, 1.00, 2.00, 2.00, 2.00, 2.00}), + toNullableVec("order", {{}, 1.00, 1.00, 2.00, 2.00, 1.00, 1.00, 2.00, 2.00})}), + createColumns({toNullableVec("partition", {{}, 1.00, 1.00, 1.00, 1.00, 2.00, 2.00, 2.00, 2.00}), + toNullableVec("order", {{}, 1.00, 1.00, 2.00, 2.00, 1.00, 1.00, 2.00, 2.00}), + toNullableVec("row_number", {1, 1, 2, 3, 4, 1, 2, 3, 4})})); // datetime - select *, row_number() over w1 from test4 window w1 as (partition by partition_datetime order by order_datetime); request = context @@ -137,22 +130,20 @@ try .sort({{"partition", false}, {"order", false}, {"partition", false}, {"order", false}}, true) .window(RowNumber(), {"order", false}, {"partition", false}, buildDefaultRowsFrame()) .build(context); - executeStreamsWithSingleSource( - request, - {toNullableDatetimeVec("partition", {"20220101010102", "20220101010102", "20220101010102", "20220101010102", "20220101010101", "20220101010101", "20220101010101", "20220101010101"}, 0), - toDatetimeVec("order", {"20220101010101", "20220101010101", "20220101010102", "20220101010102", "20220101010101", "20220101010101", "20220101010102", "20220101010102"}, 0)}, - {toNullableDatetimeVec("partition", {"20220101010101", "20220101010101", "20220101010101", "20220101010101", "20220101010102", "20220101010102", "20220101010102", "20220101010102"}, 0), - toNullableDatetimeVec("order", {"20220101010101", "20220101010101", "20220101010102", "20220101010102", "20220101010101", "20220101010101", "20220101010102", "20220101010102"}, 0), - toNullableVec("row_number", {1, 2, 3, 4, 1, 2, 3, 4})}); + ASSERT_COLUMNS_EQ_R(executeStreamsWithSingleSource(request, + {toNullableDatetimeVec("partition", {"20220101010102", "20220101010102", "20220101010102", "20220101010102", "20220101010101", "20220101010101", "20220101010101", "20220101010101"}, 0), + toDatetimeVec("order", {"20220101010101", "20220101010101", "20220101010102", "20220101010102", "20220101010101", "20220101010101", "20220101010102", "20220101010102"}, 0)}), + createColumns({toNullableDatetimeVec("partition", {"20220101010101", "20220101010101", "20220101010101", "20220101010101", "20220101010102", "20220101010102", "20220101010102", "20220101010102"}, 0), + toNullableDatetimeVec("order", {"20220101010101", "20220101010101", "20220101010102", "20220101010102", "20220101010101", "20220101010101", "20220101010102", "20220101010102"}, 0), + toNullableVec("row_number", {1, 2, 3, 4, 1, 2, 3, 4})})); // nullable - executeStreamsWithSingleSource( - request, - {toNullableDatetimeVec("partition", {"20220101010102", {}, "20220101010102", "20220101010102", "20220101010102", "20220101010101", "20220101010101", "20220101010101", "20220101010101"}, 0), - toNullableDatetimeVec("order", {"20220101010101", {}, "20220101010101", "20220101010102", "20220101010102", "20220101010101", "20220101010101", "20220101010102", "20220101010102"}, 0)}, - {toNullableDatetimeVec("partition", {{}, "20220101010101", "20220101010101", "20220101010101", "20220101010101", "20220101010102", "20220101010102", "20220101010102", "20220101010102"}, 0), - toNullableDatetimeVec("order", {{}, "20220101010101", "20220101010101", "20220101010102", "20220101010102", "20220101010101", "20220101010101", "20220101010102", "20220101010102"}, 0), - toNullableVec("row_number", {1, 1, 2, 3, 4, 1, 2, 3, 4})}); + ASSERT_COLUMNS_EQ_R(executeStreamsWithSingleSource(request, + {toNullableDatetimeVec("partition", {"20220101010102", {}, "20220101010102", "20220101010102", "20220101010102", "20220101010101", "20220101010101", "20220101010101", "20220101010101"}, 0), + toNullableDatetimeVec("order", {"20220101010101", {}, "20220101010101", "20220101010102", "20220101010102", "20220101010101", "20220101010101", "20220101010102", "20220101010102"}, 0)}), + createColumns({toNullableDatetimeVec("partition", {{}, "20220101010101", "20220101010101", "20220101010101", "20220101010101", "20220101010102", "20220101010102", "20220101010102", "20220101010102"}, 0), + toNullableDatetimeVec("order", {{}, "20220101010101", "20220101010101", "20220101010102", "20220101010102", "20220101010101", "20220101010101", "20220101010102", "20220101010102"}, 0), + toNullableVec("row_number", {1, 1, 2, 3, 4, 1, 2, 3, 4})})); // 2 partiton key and 2 order key // sql : select *, row_number() over w1 from test6 window w1 as (partition by partition_int1, partition_int2 order by order_int1,order_int2) @@ -162,41 +153,38 @@ try .window(RowNumber(), {{"order1", false}, {"order2", false}}, {{"partition1", false}, {"partition2", false}}, buildDefaultRowsFrame()) .build(context); - executeStreams( - request, - {toNullableVec("partition1", {1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2}), - toNullableVec("partition2", {1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2}), - toNullableVec("order1", {1, 1, 2, 1, 1, 2, 1, 1, 2, 1, 1, 2}), - toNullableVec("order2", {1, 2, 2, 1, 2, 2, 1, 2, 2, 1, 2, 2}), - toNullableVec("row_number", {1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3})}); + ASSERT_COLUMNS_EQ_R(executeStreams(request), + createColumns({toNullableVec("partition1", {1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2}), + toNullableVec("partition2", {1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2}), + toNullableVec("order1", {1, 1, 2, 1, 1, 2, 1, 1, 2, 1, 1, 2}), + toNullableVec("order2", {1, 2, 2, 1, 2, 2, 1, 2, 2, 1, 2, 2}), + toNullableVec("row_number", {1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3})})); /***** rank, dense_rank *****/ request = context.scan("test_db", "test_table_for_rank").sort({{"partition", false}, {"order", false}}, true).window({Rank(), DenseRank()}, {{"order", false}}, {{"partition", false}}, MockWindowFrame{}).build(context); - executeStreams( - request, - {toNullableVec("partition", {1, 1, 1, 1, 2, 2, 2, 2}), - toNullableVec("order", {1, 1, 2, 2, 1, 1, 2, 2}), - toNullableVec("rank", {1, 1, 3, 3, 1, 1, 3, 3}), - toNullableVec("dense_rank", {1, 1, 2, 2, 1, 1, 2, 2})}); + ASSERT_COLUMNS_EQ_R(executeStreams(request), + createColumns({toNullableVec("partition", {1, 1, 1, 1, 2, 2, 2, 2}), + toNullableVec("order", {1, 1, 2, 2, 1, 1, 2, 2}), + toNullableVec("rank", {1, 1, 3, 3, 1, 1, 3, 3}), + toNullableVec("dense_rank", {1, 1, 2, 2, 1, 1, 2, 2})})); // nullable - executeStreamsWithSingleSource( - request, - {toNullableVec("partition", {{}, 1, 1, 1, 1, 2, 2, 2, 2}), - toNullableVec("order", {{}, 1, 1, 2, 2, 1, 1, 2, 2})}, - {toNullableVec("partition", {{}, 1, 1, 1, 1, 2, 2, 2, 2}), - toNullableVec("order", {{}, 1, 1, 2, 2, 1, 1, 2, 2}), - toNullableVec("rank", {1, 1, 1, 3, 3, 1, 1, 3, 3}), - toNullableVec("dense_rank", {1, 1, 1, 2, 2, 1, 1, 2, 2})}); - - executeStreamsWithSingleSource( - request, - {toNullableVec("partition", {{}, {}, 1, 1, 1, 1, 2, 2, 2, 2}), - toNullableVec("order", {{}, 1, 1, 1, 2, 2, 1, 1, 2, 2})}, - {toNullableVec("partition", {{}, {}, 1, 1, 1, 1, 2, 2, 2, 2}), - toNullableVec("order", {{}, 1, 1, 1, 2, 2, 1, 1, 2, 2}), - toNullableVec("rank", {1, 2, 1, 1, 3, 3, 1, 1, 3, 3}), - toNullableVec("dense_rank", {1, 2, 1, 1, 2, 2, 1, 1, 2, 2})}); + ASSERT_COLUMNS_EQ_R(executeStreamsWithSingleSource(request, + {toNullableVec("partition", {{}, 1, 1, 1, 1, 2, 2, 2, 2}), + toNullableVec("order", {{}, 1, 1, 2, 2, 1, 1, 2, 2})}), + createColumns({toNullableVec("partition", {{}, 1, 1, 1, 1, 2, 2, 2, 2}), + toNullableVec("order", {{}, 1, 1, 2, 2, 1, 1, 2, 2}), + toNullableVec("rank", {1, 1, 1, 3, 3, 1, 1, 3, 3}), + toNullableVec("dense_rank", {1, 1, 1, 2, 2, 1, 1, 2, 2})})); + + ASSERT_COLUMNS_EQ_R(executeStreamsWithSingleSource( + request, + {toNullableVec("partition", {{}, {}, 1, 1, 1, 1, 2, 2, 2, 2}), + toNullableVec("order", {{}, 1, 1, 1, 2, 2, 1, 1, 2, 2})}), + createColumns({toNullableVec("partition", {{}, {}, 1, 1, 1, 1, 2, 2, 2, 2}), + toNullableVec("order", {{}, 1, 1, 1, 2, 2, 1, 1, 2, 2}), + toNullableVec("rank", {1, 2, 1, 1, 3, 3, 1, 1, 3, 3}), + toNullableVec("dense_rank", {1, 2, 1, 1, 2, 2, 1, 1, 2, 2})})); } CATCH diff --git a/libs/libcommon/CMakeLists.txt b/libs/libcommon/CMakeLists.txt index 5fd25c5d238..2bedb312d07 100644 --- a/libs/libcommon/CMakeLists.txt +++ b/libs/libcommon/CMakeLists.txt @@ -198,3 +198,7 @@ if (ARCH_AMD64) src/crc64_sse2_asimd.cpp APPEND COMPILE_FLAGS "-mpclmul") endif() + +if (ARCH_AARCH64 AND ARCH_LINUX) + target_link_libraries (common PUBLIC tiflash-aarch64-string tiflash-aarch64-math) +endif() diff --git a/metrics/grafana/tiflash_summary.json b/metrics/grafana/tiflash_summary.json index f899a47ed10..0d72f950add 100644 --- a/metrics/grafana/tiflash_summary.json +++ b/metrics/grafana/tiflash_summary.json @@ -52,7 +52,7 @@ "gnetId": null, "graphTooltip": 1, "id": null, - "iteration": 1653635389238, + "iteration": 1654217728945, "links": [], "panels": [ { @@ -542,7 +542,14 @@ "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [], + "seriesOverrides": [ + { + "alias": "/limit/", + "fill": 0, + "nullPointMode": "null", + "color": "#C4162A" + } + ], "spaceLength": 10, "stack": false, "steppedLine": false, @@ -633,6 +640,13 @@ "intervalFactor": 1, "legendFormat": "{{instance}}", "refId": "K" + }, + { + "expr": "sum(tiflash_system_current_metric_MemoryCapacity{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", + "legendFormat": "limit-{{instance}}", + "exemplar": true, + "refId": "L", + "hide": false } ], "thresholds": [], @@ -701,15 +715,15 @@ "hiddenSeries": false, "id": 51, "legend": { - "alignAsTable": false, + "alignAsTable": true, "avg": false, - "current": false, + "current": true, "max": false, "min": false, - "rightSide": false, + "rightSide": true, "show": true, "total": false, - "values": false + "values": true }, "lines": true, "linewidth": 1, @@ -728,6 +742,12 @@ "alias": "total", "fill": 0, "lines": false + }, + { + "alias": "/limit/", + "fill": 0, + "nullPointMode": "null", + "color": "#C4162A" } ], "spaceLength": 10, @@ -742,6 +762,13 @@ "legendFormat": "{{instance}}", "refId": "A", "step": 40 + }, + { + "expr": "sum(tiflash_system_current_metric_LogicalCPUCores{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", + "legendFormat": "limit-{{instance}}", + "exemplar": true, + "refId": "B", + "intervalFactor": 1 } ], "thresholds": [], @@ -3878,7 +3905,7 @@ "fill": 0, "fillGradient": 0, "gridPos": { - "h": 8, + "h": 5, "w": 12, "x": 0, "y": 21 @@ -3893,6 +3920,7 @@ "min": false, "rightSide": true, "show": true, + "sideWidth": null, "total": false, "values": false }, @@ -3908,38 +3936,27 @@ "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ - { - "alias": "/(delta_merge)|(seg_)/", - "yaxis": 2 - } - ], + "seriesOverrides": [], "spaceLength": 10, "stack": false, - "steppedLine": false, + "steppedLine": true, "targets": [ { - "expr": "sum(rate(tiflash_storage_subtask_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type!~\"delta_merge|delta_merge_fg|delta_merge_bg_gc|seg_merge|seg_split|seg_split_fg\"}[1m])) by (type)", + "exemplar": true, + "expr": "sum(rate(tiflash_storage_subtask_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type!~\"(delta_merge|seg_merge|seg_split).*\"}[$__rate_interval])) by (type)", "format": "time_series", "hide": false, - "intervalFactor": 1, + "interval": "", + "intervalFactor": 2, "legendFormat": "{{type}}", "refId": "A" - }, - { - "expr": "sum(increase(tiflash_storage_subtask_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=~\"delta_merge|delta_merge_fg|delta_merge_bg_gc|seg_merge|seg_split|seg_split_fg\"}[1m])) by (type)", - "format": "time_series", - "hide": false, - "intervalFactor": 1, - "legendFormat": "{{type}}", - "refId": "B" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Internal Tasks OPS", + "title": "Small Internal Tasks OPS", "tooltip": { "shared": true, "sort": 0, @@ -3955,7 +3972,7 @@ }, "yaxes": [ { - "decimals": null, + "decimals": 1, "format": "ops", "label": null, "logBase": 1, @@ -3969,7 +3986,7 @@ "logBase": 1, "max": null, "min": "0", - "show": true + "show": false } ], "yaxis": { @@ -3988,10 +4005,10 @@ "defaults": {}, "overrides": [] }, - "fill": 1, + "fill": 0, "fillGradient": 0, "gridPos": { - "h": 8, + "h": 5, "w": 12, "x": 12, "y": 21 @@ -4023,58 +4040,233 @@ "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ { - "alias": "/^.*-delta_merge/", - "yaxis": 2 + "exemplar": false, + "expr": "histogram_quantile(1, sum(rate(tiflash_storage_subtask_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type!~\"(delta_merge|seg_merge|seg_split).*\"}[$__rate_interval])) by (le,type))", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "max-{{type}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Small Internal Tasks Duration", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 1, + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true }, { - "alias": "/^.*-seg_split/", - "yaxis": 2 + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": false } ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "description": "Total number of storage's internal sub tasks", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 5, + "w": 12, + "x": 0, + "y": 26 + }, + "hiddenSeries": false, + "id": 130, + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], "spaceLength": 10, "stack": false, - "steppedLine": false, + "steppedLine": true, "targets": [ { - "expr": "histogram_quantile(1, sum(rate(tiflash_storage_subtask_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le,type))", + "exemplar": true, + "expr": "sum(rate(tiflash_storage_subtask_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=~\"(delta_merge|seg_merge|seg_split).*\"}[$__rate_interval])) by (type)", "format": "time_series", "hide": false, - "intervalFactor": 1, - "legendFormat": "max-{{type}}", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{type}}", "refId": "A" - }, + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Large Internal Tasks OPS", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ { - "expr": "histogram_quantile(0.99, sum(rate(tiflash_storage_subtask_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le,type))", - "format": "time_series", - "hide": true, - "intervalFactor": 1, - "legendFormat": "99-{{type}}", - "refId": "B" + "decimals": 1, + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true }, { - "expr": "histogram_quantile(0.95, sum(rate(tiflash_storage_subtask_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le,type))", - "format": "time_series", - "hide": true, - "intervalFactor": 1, - "legendFormat": "95-{{type}}", - "refId": "C" - }, + "format": "opm", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "description": "Duration of storage's internal sub tasks", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 5, + "w": 12, + "x": 12, + "y": 26 + }, + "hiddenSeries": false, + "id": 131, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sort": null, + "sortDesc": null, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ { - "expr": "histogram_quantile(0.80, sum(rate(tiflash_storage_subtask_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le,type))", + "exemplar": true, + "expr": "histogram_quantile(1, sum(rate(tiflash_storage_subtask_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=~\"(delta_merge|seg_merge|seg_split).*\"}[$__rate_interval])) by (le,type))", "format": "time_series", - "hide": true, - "intervalFactor": 1, - "legendFormat": "80-{{type}}", - "refId": "D" + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "max-{{type}}", + "refId": "A" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Internal Tasks Duration", + "title": "Large Internal Tasks Duration", "tooltip": { "shared": true, "sort": 0, @@ -4090,6 +4282,7 @@ }, "yaxes": [ { + "decimals": 1, "format": "s", "label": null, "logBase": 1, @@ -4103,7 +4296,7 @@ "logBase": 1, "max": null, "min": "0", - "show": true + "show": false } ], "yaxis": { @@ -4128,7 +4321,7 @@ "h": 8, "w": 12, "x": 0, - "y": 29 + "y": 31 }, "hiddenSeries": false, "id": 43, @@ -4234,7 +4427,7 @@ "h": 8, "w": 12, "x": 12, - "y": 29 + "y": 31 }, "heatmap": {}, "hideZeroBuckets": true, @@ -4297,7 +4490,7 @@ "h": 8, "w": 12, "x": 0, - "y": 37 + "y": 39 }, "hiddenSeries": false, "id": 46, @@ -4420,7 +4613,7 @@ "h": 8, "w": 12, "x": 12, - "y": 37 + "y": 39 }, "hiddenSeries": false, "id": 47, @@ -4544,7 +4737,7 @@ "h": 8, "w": 12, "x": 0, - "y": 45 + "y": 47 }, "height": "", "hiddenSeries": false, @@ -4674,7 +4867,7 @@ "h": 8, "w": 12, "x": 12, - "y": 45 + "y": 47 }, "height": "", "hiddenSeries": false, @@ -4802,7 +4995,7 @@ "h": 8, "w": 12, "x": 0, - "y": 53 + "y": 55 }, "hiddenSeries": false, "id": 88, @@ -5002,7 +5195,7 @@ "h": 8, "w": 12, "x": 12, - "y": 53 + "y": 55 }, "hiddenSeries": false, "id": 67, @@ -5116,7 +5309,7 @@ "h": 8, "w": 12, "x": 0, - "y": 61 + "y": 63 }, "hiddenSeries": false, "id": 84, @@ -5216,7 +5409,7 @@ "h": 8, "w": 12, "x": 12, - "y": 61 + "y": 63 }, "hiddenSeries": false, "id": 86, @@ -8183,5 +8376,5 @@ "timezone": "", "title": "Test-Cluster-TiFlash-Summary", "uid": "SVbh2xUWk", - "version": 2 -} + "version": 1 +} \ No newline at end of file diff --git a/release-centos7-llvm/Makefile b/release-centos7-llvm/Makefile index 1b15df7ddc3..9c1bba42a53 100644 --- a/release-centos7-llvm/Makefile +++ b/release-centos7-llvm/Makefile @@ -23,6 +23,10 @@ image_tiflash_llvm_base_aarch64: build_tiflash_release_amd64: docker run --rm -v $(realpath ..):/build/tics hub.pingcap.net/tiflash/tiflash-llvm-base:amd64 /build/tics/release-centos7-llvm/scripts/build-release.sh +# Add build_tiflash_debug_amd64 target to enable FailPoints on x86. Since outputs are the same as release version, no new package targets added. +build_tiflash_debug_amd64: + docker run --rm -v $(realpath ..):/build/tics hub.pingcap.net/tiflash/tiflash-llvm-base:amd64 /build/tics/release-centos7-llvm/scripts/build-debug.sh + build_tiflash_ci_amd64: docker run --rm -v $(realpath ..):/build/tics hub.pingcap.net/tiflash/tiflash-llvm-base:amd64 /build/tics/release-centos7-llvm/scripts/build-tiflash-ci.sh diff --git a/release-centos7-llvm/scripts/build-debug.sh b/release-centos7-llvm/scripts/build-debug.sh new file mode 100755 index 00000000000..59dc9b86a54 --- /dev/null +++ b/release-centos7-llvm/scripts/build-debug.sh @@ -0,0 +1,23 @@ +#!/bin/bash +# Copyright 2022 PingCAP, Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +CMAKE_PREFIX_PATH=$1 + +set -ueox pipefail + +SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )" + +${SCRIPTPATH}/build-tiflash-release.sh "DEBUG" "${CMAKE_PREFIX_PATH}" diff --git a/release-centos7-llvm/scripts/build-tiflash-release.sh b/release-centos7-llvm/scripts/build-tiflash-release.sh index 42993b51afe..01ca00e8706 100755 --- a/release-centos7-llvm/scripts/build-tiflash-release.sh +++ b/release-centos7-llvm/scripts/build-tiflash-release.sh @@ -47,7 +47,13 @@ ENABLE_PCH=${ENABLE_PCH:-ON} INSTALL_DIR="${SRCPATH}/release-centos7-llvm/tiflash" rm -rf ${INSTALL_DIR} && mkdir -p ${INSTALL_DIR} -BUILD_DIR="${SRCPATH}/release-centos7-llvm/build-release" +if [ $CMAKE_BUILD_TYPE == "RELWITHDEBINFO" ]; then + BUILD_DIR="$SRCPATH/release-centos7-llvm/build-release" + ENABLE_FAILPOINTS="OFF" +else + BUILD_DIR="$SRCPATH/release-centos7-llvm/build-debug" + ENABLE_FAILPOINTS="ON" +fi rm -rf ${BUILD_DIR} && mkdir -p ${BUILD_DIR} && cd ${BUILD_DIR} cmake -S "${SRCPATH}" \ @@ -55,6 +61,7 @@ cmake -S "${SRCPATH}" \ -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} \ -DENABLE_TESTING=OFF \ -DENABLE_TESTS=OFF \ + -DENABLE_FAILPOINTS=${ENABLE_FAILPOINTS} \ -Wno-dev \ -DUSE_CCACHE=OFF \ -DRUN_HAVE_STD_REGEX=0 \ diff --git a/tests/fullstack-test-dt/clustered_index/ddl.test b/tests/fullstack-test-dt/clustered_index/ddl.test index 8abe450c11a..6c4925c9619 100644 --- a/tests/fullstack-test-dt/clustered_index/ddl.test +++ b/tests/fullstack-test-dt/clustered_index/ddl.test @@ -66,3 +66,89 @@ mysql> set session tidb_isolation_read_engines='tiflash'; select * from test.t_2 mysql> drop table test.t_1; mysql> drop table test.t_2; + +### about issue 5154 to check whether add column/drop column will effect the cluster index decode +### drop the column between two columns that are cluster index columns + +mysql> drop table if exists test.t_3; +mysql> create table test.t_3 (A int, B varchar(20), C int, D int, PRIMARY KEY(A,C) CLUSTERED); +mysql> insert into test.t_3 values (1,'1',1,1),(2,'2',2,2); + +mysql> alter table test.t_3 set tiflash replica 1; + +func> wait_table test t_3 + +mysql> set session tidb_isolation_read_engines='tiflash';select * from test.t_3; ++---+---+---+---+ +| A | B | C | D | ++---+---+---+---+ +| 1 | 1 | 1 | 1 | +| 2 | 2 | 2 | 2 | ++---+---+---+---+ + +mysql> alter table test.t_3 drop column B; + +mysql> set session tidb_isolation_read_engines='tiflash';select * from test.t_3; ++---+---+---+ +| A | C | D | ++---+---+---+ +| 1 | 1 | 1 | +| 2 | 2 | 2 | ++---+---+---+ + +# insert some rows +mysql> insert into test.t_3 values (3,3,3),(4,4,4); + +mysql> set session tidb_isolation_read_engines='tiflash';select * from test.t_3; ++---+---+---+ +| A | C | D | ++---+---+---+ +| 1 | 1 | 1 | +| 2 | 2 | 2 | +| 3 | 3 | 3 | +| 4 | 4 | 4 | ++---+---+---+ + +mysql> drop table test.t_3; + +### add the column between two columns that are cluster index columns +mysql> drop table if exists test.t_4 +mysql> create table test.t_4 (A int, B varchar(20), C int, D int, PRIMARY KEY(A,C) CLUSTERED); + +mysql> insert into test.t_4 values (1,'1',1,1),(2,'2',2,2); + +mysql> alter table test.t_4 set tiflash replica 1; + +func> wait_table test t_4 + +mysql> set session tidb_isolation_read_engines='tiflash';select * from test.t_4; ++---+---+---+---+ +| A | B | C | D | ++---+---+---+---+ +| 1 | 1 | 1 | 1 | +| 2 | 2 | 2 | 2 | ++---+---+---+---+ + +mysql> alter table test.t_4 Add column E int after B; + +mysql> set session tidb_isolation_read_engines='tiflash';select * from test.t_4; ++---+---+------+---+---+ +| A | B | E | C | D | ++---+---+------+---+---+ +| 1 | 1 | NULL | 1 | 1 | +| 2 | 2 | NULL | 2 | 2 | ++---+---+------+---+---+ + +mysql> insert into test.t_4 values (3,'3',3,3,3),(4,'4',4,4,4); + +mysql> set session tidb_isolation_read_engines='tiflash';select * from test.t_4; ++---+---+------+------+------+ +| A | B | E | C | D | ++---+---+------+------+------+ +| 1 | 1 | NULL | 1 | 1 | +| 2 | 2 | NULL | 2 | 2 | +| 3 | 3 | 3 | 3 | 3 | +| 4 | 4 | 4 | 4 | 4 | ++---+---+------+------+------+ + +mysql> drop table test.t_4; \ No newline at end of file diff --git a/tests/fullstack-test/expr/bitshift_operator.test b/tests/fullstack-test/expr/bitshift_operator.test new file mode 100644 index 00000000000..0d55a1b56a9 --- /dev/null +++ b/tests/fullstack-test/expr/bitshift_operator.test @@ -0,0 +1,43 @@ +# Copyright 2022 PingCAP, Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +mysql> drop table if exists test.t; +mysql> create table test.t (a int); +mysql> alter table test.t set tiflash replica 1; +mysql> insert into test.t values(-1); + +func> wait_table test t + +mysql> set tidb_enforce_mpp=1; set @@session.tidb_isolation_read_engines = "tiflash"; select a>>0 as v1, a>>64 as v2, a>>10 as v3 from test.t; ++----------------------+------+-------------------+ +| v1 | v2 | v3 | ++----------------------+------+-------------------+ +| 18446744073709551615 | 0 | 18014398509481983 | ++----------------------+------+-------------------+ + +mysql> set tidb_enforce_mpp=1; set @@session.tidb_isolation_read_engines = "tiflash"; select a from test.t where a>>100000=0; ++------+ +| a | ++------+ +| -1 | ++------+ + +mysql> set tidb_enforce_mpp=1; set @@session.tidb_isolation_read_engines = "tiflash"; select a from test.t where a>>63=1; ++------+ +| a | ++------+ +| -1 | ++------+ + +mysql> drop table if exists test.t diff --git a/tests/fullstack-test/expr/duration_pushdown.test b/tests/fullstack-test/expr/duration_pushdown.test index 63106fa1788..442a708a802 100644 --- a/tests/fullstack-test/expr/duration_pushdown.test +++ b/tests/fullstack-test/expr/duration_pushdown.test @@ -106,6 +106,14 @@ mysql> use test; set tidb_enforce_mpp=1; set tidb_isolation_read_engines='tiflas # | 123500 | # +----------------+ +mysql> use test; set tidb_enforce_mpp=1; set tidb_isolation_read_engines='tiflash'; select time_to_sec(a) from t; ++----------------+ +| time_to_sec(a) | ++----------------+ +| 2520610 | +| -2520610 | ++----------------+ + mysql> drop table if exists test.time_test; mysql> create table test.time_test(id int(11),v1 time(3) not null, v2 time(3)); diff --git a/tests/fullstack-test/expr/format.test b/tests/fullstack-test/expr/format.test index 8cea75d6914..719e30c974d 100644 --- a/tests/fullstack-test/expr/format.test +++ b/tests/fullstack-test/expr/format.test @@ -44,3 +44,52 @@ int_val 1,234.000 mysql> drop table if exists test.t + +mysql> create table test.t(id int, value decimal(65,4)) +mysql> alter table test.t set tiflash replica 1 +mysql> insert into test.t values(1,9999999999999999999999999999999999999999999999999999999999999.9999) + +func> wait_table test t + +mysql> set tidb_enforce_mpp=1; set tidb_isolation_read_engines='tiflash'; select format(value,-3) as result from test.t +result +10,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000 + +mysql> set tidb_enforce_mpp=1; set tidb_isolation_read_engines='tiflash'; select format(value,0) as result from test.t +result +10,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000 + +mysql> set tidb_enforce_mpp=1; set tidb_isolation_read_engines='tiflash'; select format(value,3) as result from test.t +result +10,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000.000 + +mysql> set tidb_enforce_mpp=1; set tidb_isolation_read_engines='tiflash'; select format(value,10) as result from test.t +result +9,999,999,999,999,999,999,999,999,999,999,999,999,999,999,999,999,999,999,999,999.9999000000 + + +mysql> drop table if exists test.t + +mysql> create table test.t(id int, value decimal(7,4)) +mysql> alter table test.t set tiflash replica 1 +mysql> insert into test.t values(1,999.9999) + +func> wait_table test t + +mysql> set tidb_enforce_mpp=1; set tidb_isolation_read_engines='tiflash'; select format(value,-2) as result from test.t +result +1,000 + +mysql> set tidb_enforce_mpp=1; set tidb_isolation_read_engines='tiflash'; select format(value,0) as result from test.t +result +1,000 + +mysql> set tidb_enforce_mpp=1; set tidb_isolation_read_engines='tiflash'; select format(value,2) as result from test.t +result +1,000.00 + +mysql> set tidb_enforce_mpp=1; set tidb_isolation_read_engines='tiflash'; select format(value,10) as result from test.t +result +999.9999000000 + +mysql> drop table if exists test.t diff --git a/tests/fullstack-test/expr/get_format.test b/tests/fullstack-test/expr/get_format.test new file mode 100644 index 00000000000..5409302c10a --- /dev/null +++ b/tests/fullstack-test/expr/get_format.test @@ -0,0 +1,60 @@ +# Copyright 2022 PingCAP, Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +mysql> drop table if exists test.t; +mysql> create table test.t(location varchar(10)); +mysql> insert into test.t values('USA'), ('JIS'), ('ISO'), ('EUR'), ('INTERNAL'); +mysql> alter table test.t set tiflash replica 1; +func> wait_table test t +mysql> set @@tidb_enforce_mpp=1; set @@tidb_isolation_read_engines='tiflash'; select GET_FORMAT(DATE, location) from test.t; ++----------------------------+ +| GET_FORMAT(DATE, location) | ++----------------------------+ +| %m.%d.%Y | +| %Y-%m-%d | +| %Y-%m-%d | +| %d.%m.%Y | +| %Y%m%d | ++----------------------------+ +mysql> set @@tidb_enforce_mpp=1; set @@tidb_isolation_read_engines='tiflash'; select GET_FORMAT(DATETIME, location) from test.t; ++--------------------------------+ +| GET_FORMAT(DATETIME, location) | ++--------------------------------+ +| %Y-%m-%d %H.%i.%s | +| %Y-%m-%d %H:%i:%s | +| %Y-%m-%d %H:%i:%s | +| %Y-%m-%d %H.%i.%s | +| %Y%m%d%H%i%s | ++--------------------------------+ +mysql> set @@tidb_enforce_mpp=1; set @@tidb_isolation_read_engines='tiflash'; select GET_FORMAT(TIMESTAMP, location) from test.t; ++---------------------------------+ +| GET_FORMAT(TIMESTAMP, location) | ++---------------------------------+ +| %Y-%m-%d %H.%i.%s | +| %Y-%m-%d %H:%i:%s | +| %Y-%m-%d %H:%i:%s | +| %Y-%m-%d %H.%i.%s | +| %Y%m%d%H%i%s | ++---------------------------------+ +mysql> set @@tidb_enforce_mpp=1; set @@tidb_isolation_read_engines='tiflash'; select GET_FORMAT(TIME, location) from test.t; ++----------------------------+ +| GET_FORMAT(TIME, location) | ++----------------------------+ +| %h:%i:%s %p | +| %H:%i:%s | +| %H:%i:%s | +| %H.%i.%s | +| %H%i%s | ++----------------------------+ +mysql> drop table if exists test.t; diff --git a/tests/fullstack-test/expr/reverse.test b/tests/fullstack-test/expr/reverse.test new file mode 100644 index 00000000000..9195adf2b7d --- /dev/null +++ b/tests/fullstack-test/expr/reverse.test @@ -0,0 +1,44 @@ +# Copyright 2022 PingCAP, Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +mysql> drop table if exists test.t; +mysql> create table if not exists test.t(a varchar(256)); + + +mysql> insert into test.t values('one week’s time test'); +mysql> insert into test.t values('abc测试def'); +mysql> insert into test.t values('abcテストabc'); +mysql> insert into test.t values('ѐёђѓєѕіїјљњћќѝўџ'); +mysql> insert into test.t values('+ѐ-ё*ђ/ѓ!є@ѕ#і@ї%ј……љ&њ(ћ)ќ¥ѝ#ў@џ!^'); +mysql> insert into test.t values('αβγδεζηθικλμνξοπρστυφχψωσ'); +mysql> insert into test.t values('▲α▼βγ➨δε☎ζη✂θι€κλ♫μν✓ξο✚πρ℉στ♥υφ♖χψ♘ω★σ✕'); +mysql> insert into test.t values('թփձջրչճժծքոեռտըւիօպասդֆգհյկլխզղցվբնմշ'); +mysql> insert into test.t values(NULL); +mysql> alter table test.t set tiflash replica 1; +func> wait_table test t + +mysql> set tidb_enforce_mpp=1; set tidb_isolation_read_engines='tiflash'; select reverse(a) from test.t; ++-------------------------------------------------------------------------------------------------+ +| reverse(a) | ++-------------------------------------------------------------------------------------------------+ +| tset emit s’keew eno | +| fed试测cba | +| cbaトステcba | +| џўѝќћњљјїіѕєѓђёѐ | +| ^!џ@ў#ѝ¥ќ)ћ(њ&љ……ј%ї@і#ѕ@є!ѓ/ђ*ё-ѐ+ | +| σωψχφυτσρποξνμλκιθηζεδγβα | +| ✕σ★ω♘ψχ♖φυ♥τσ℉ρπ✚οξ✓νμ♫λκ€ιθ✂ηζ☎εδ➨γβ▼α▲ | +| շմնբվցղզխլկյհգֆդսապօիւըտռեոքծժճչրջձփթ | +| NULL | ++-------------------------------------------------------------------------------------------------+ diff --git a/tests/fullstack-test/mpp/issue_2471.test b/tests/fullstack-test/mpp/issue_2471.test index 4a1528595e8..9966eaadec3 100644 --- a/tests/fullstack-test/mpp/issue_2471.test +++ b/tests/fullstack-test/mpp/issue_2471.test @@ -35,7 +35,7 @@ mysql> use test; set @@tidb_isolation_read_engines='tiflash'; set @@tidb_opt_bro => DBGInvoke __enable_fail_point(exception_in_creating_set_input_stream) mysql> use test; set @@tidb_isolation_read_engines='tiflash'; set @@tidb_opt_broadcast_cartesian_join=2; select * from a as t1 left join a as t2 on t1.id = t2.id; -ERROR 1105 (HY000) at line 1: other error for mpp stream: DB::Exception: Fail point FailPoints::exception_in_creating_set_input_stream is triggered. +ERROR 1105 (HY000) at line 1: other error for mpp stream: Code: 10007, e.displayText() = DB::Exception: Fail point FailPoints::exception_in_creating_set_input_stream is triggered., e.what() = DB::Exception, => DBGInvoke __disable_fail_point(exception_in_creating_set_input_stream) diff --git a/tests/fullstack-test/mpp/mpp_fail.test b/tests/fullstack-test/mpp/mpp_fail.test index 7af5fef3f89..0e272c0b621 100644 --- a/tests/fullstack-test/mpp/mpp_fail.test +++ b/tests/fullstack-test/mpp/mpp_fail.test @@ -71,20 +71,20 @@ ERROR 1105 (HY000) at line 1: DB::Exception: Fail point FailPoints::exception_be ## exception during mpp run non root task => DBGInvoke __enable_fail_point(exception_during_mpp_non_root_task_run) mysql> use test; set @@tidb_isolation_read_engines='tiflash'; set @@tidb_allow_mpp=1; select count(value), id from t group by id; -ERROR 1105 (HY000) at line 1: other error for mpp stream: DB::Exception: Exchange receiver meet error : DB::Exception: Fail point FailPoints::exception_during_mpp_non_root_task_run is triggered. +ERROR 1105 (HY000) at line 1: other error for mpp stream: Code: 0, e.displayText() = DB::Exception: Exchange receiver meet error : Code: 10007, e.displayText() = DB::Exception: Fail point FailPoints::exception_during_mpp_non_root_task_run is triggered., e.what() = DB::Exception,, e.what() = DB::Exception, => DBGInvoke __disable_fail_point(exception_during_mpp_non_root_task_run) ## exception during mpp run root task => DBGInvoke __enable_fail_point(exception_during_mpp_root_task_run) mysql> use test; set @@tidb_isolation_read_engines='tiflash'; set @@tidb_allow_mpp=1; select count(value), id from t group by id; -ERROR 1105 (HY000) at line 1: other error for mpp stream: DB::Exception: Fail point FailPoints::exception_during_mpp_root_task_run is triggered. +ERROR 1105 (HY000) at line 1: other error for mpp stream: Code: 10007, e.displayText() = DB::Exception: Fail point FailPoints::exception_during_mpp_root_task_run is triggered., e.what() = DB::Exception, => DBGInvoke __disable_fail_point(exception_during_mpp_root_task_run) ## exception during mpp write err to tunnel => DBGInvoke __enable_fail_point(exception_during_mpp_non_root_task_run) => DBGInvoke __enable_fail_point(exception_during_mpp_write_err_to_tunnel) mysql> use test; set @@tidb_isolation_read_engines='tiflash'; set @@tidb_allow_mpp=1; select count(value), id from t group by id; -ERROR 1105 (HY000) at line 1: other error for mpp stream: DB::Exception: Exchange receiver meet error : Failed to write error msg to tunnel +ERROR 1105 (HY000) at line 1: other error for mpp stream: Code: 0, e.displayText() = DB::Exception: Exchange receiver meet error : Failed to write error msg to tunnel, e.what() = DB::Exception, => DBGInvoke __disable_fail_point(exception_during_mpp_non_root_task_run) => DBGInvoke __disable_fail_point(exception_during_mpp_write_err_to_tunnel) @@ -92,7 +92,7 @@ ERROR 1105 (HY000) at line 1: other error for mpp stream: DB::Exception: Exchang => DBGInvoke __enable_fail_point(exception_during_mpp_non_root_task_run) => DBGInvoke __enable_fail_point(exception_during_mpp_close_tunnel) mysql> use test; set @@tidb_isolation_read_engines='tiflash'; set @@tidb_allow_mpp=1; select count(value), id from t group by id; -ERROR 1105 (HY000) at line 1: other error for mpp stream: DB::Exception: Exchange receiver meet error : DB::Exception: Fail point FailPoints::exception_during_mpp_non_root_task_run is triggered. +ERROR 1105 (HY000) at line 1: other error for mpp stream: Code: 0, e.displayText() = DB::Exception: Exchange receiver meet error : Code: 10007, e.displayText() = DB::Exception: Fail point FailPoints::exception_during_mpp_non_root_task_run is triggered., e.what() = DB::Exception,, e.what() = DB::Exception, => DBGInvoke __disable_fail_point(exception_during_mpp_non_root_task_run) => DBGInvoke __disable_fail_point(exception_during_mpp_close_tunnel) @@ -125,7 +125,7 @@ ERROR 1105 (HY000) at line 1: other error for mpp stream: DB::Exception: Exchang ## ensure build1, build2-probe1, probe2 in the CreatingSets, test the bug where build1 throw exception but not change the build state, thus block the build2-probe1, at last this query hangs. => DBGInvoke __enable_fail_point(exception_mpp_hash_build) mysql> use test; set @@tidb_isolation_read_engines='tiflash'; set @@tidb_allow_mpp=1; set @@tidb_broadcast_join_threshold_count=0; set @@tidb_broadcast_join_threshold_size=0; select t1.id from test.t t1 join test.t t2 on t1.id = t2.id and t1.id <2 join (select id from test.t group by id) t3 on t2.id=t3.id; -ERROR 1105 (HY000) at line 1: other error for mpp stream: DB::Exception: Fail point FailPoints::exception_mpp_hash_build is triggered. +ERROR 1105 (HY000) at line 1: other error for mpp stream: Code: 10007, e.displayText() = DB::Exception: Fail point FailPoints::exception_mpp_hash_build is triggered., e.what() = DB::Exception, => DBGInvoke __disable_fail_point(exception_mpp_hash_build) # Clean up. diff --git a/tests/fullstack-test/mpp/window.test b/tests/fullstack-test/mpp/window.test new file mode 100644 index 00000000000..698d39ef2ea --- /dev/null +++ b/tests/fullstack-test/mpp/window.test @@ -0,0 +1,32 @@ +# Copyright 2022 PingCAP, Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +mysql> drop table if exists test.t1; +mysql> create table test.t1(c1 int, c2 int); +mysql> insert into test.t1 values(1, 1),(2, 2),(3, 3),(1, 1),(2, 2),(3, 3),(4, 4); +mysql> alter table test.t1 set tiflash replica 1; +func> wait_table test t1 +mysql> use test; set @@tidb_isolation_read_engines='tiflash'; select c1, c2, row_number() over w2, row_number() over w1 from test.t1 window w1 as(partition by c1), w2 as (partition by c1, c2) order by 1, 2, 3, 4; ++------+------+----------------------+----------------------+ +| c1 | c2 | row_number() over w2 | row_number() over w1 | ++------+------+----------------------+----------------------+ +| 1 | 1 | 1 | 1 | +| 1 | 1 | 2 | 2 | +| 2 | 2 | 1 | 1 | +| 2 | 2 | 2 | 2 | +| 3 | 3 | 1 | 1 | +| 3 | 3 | 2 | 2 | +| 4 | 4 | 1 | 1 | ++------+------+----------------------+----------------------+ +mysql> drop table if exists test.t1; diff --git a/tests/fullstack-test2/ddl/alter_table_tiflash_replica_and_mode.test b/tests/fullstack-test2/ddl/alter_table_tiflash_replica_and_mode.test new file mode 100644 index 00000000000..5e43936379b --- /dev/null +++ b/tests/fullstack-test2/ddl/alter_table_tiflash_replica_and_mode.test @@ -0,0 +1,89 @@ +# Copyright 2022 PingCAP, Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# test tiflash replica for normal case +mysql> drop table if exists test.t +mysql> create table test.t(a int) +mysql> alter table test.t set tiflash replica 1 + +func> wait_table test t + +>> DBGInvoke get_tiflash_replica_count("test", "t") +┌─get_tiflash_replica_count(test, t)─┐ +│ 1 │ +└────────────────────────────────────┘ + +# test tiflash mode in normal mode +>> DBGInvoke get_tiflash_mode("test", "t") +┌─get_tiflash_mode(test, t)─┐ +│ │ +└───────────────────────────┘ + +mysql> alter table test.t set tiflash mode fast + +>> DBGInvoke __refresh_schemas() + +# test tiflash mode in fast mode +>> DBGInvoke get_tiflash_mode("test", "t") +┌─get_tiflash_mode(test, t)───┐ +│ fast │ +└─────────────────────────────┘ + +# test replica for partition tables +mysql> drop table if exists test.t +mysql> create table test.t (x int) partition by range (x) (partition p0 values less than (5), partition p1 values less than (10)); +mysql> alter table test.t set tiflash mode fast +mysql> alter table test.t set tiflash replica 1 + +func> wait_table test t + +>> DBGInvoke get_tiflash_replica_count("test", "t") +┌─get_tiflash_replica_count(test, t)─┐ +│ 1 │ +└────────────────────────────────────┘ + +>> DBGInvoke get_tiflash_mode("test", "t") +┌─get_tiflash_mode(test, t)──────────┐ +│ fast │ +└────────────────────────────────────┘ + +>> DBGInvoke get_partition_tables_tiflash_replica_count("test", "t") +┌─get_partition_tables_tiflash_replica_count(test, t)─┐ +│ 1/1/ │ +└─────────────────────────────────────────────────────┘ + +# test tiflash mode for partition tables +>> DBGInvoke get_partition_tables_tiflash_mode("test", "t") +┌─get_partition_tables_tiflash_mode(test, t)─┐ +│ fast/fast/ │ +└────────────────────────────────────────────┘ + +# test replica for add partition tables after set replica +mysql> alter table test.t add partition (partition p2 values less than (2010)); + +>> DBGInvoke __refresh_schemas() + +>> DBGInvoke get_partition_tables_tiflash_replica_count("test", "t") +┌─get_partition_tables_tiflash_replica_count(test, t)─┐ +│ 1/1/1/ │ +└─────────────────────────────────────────────────────┘ + +# test tiflash mode for add partition tables after set replica +>> DBGInvoke get_partition_tables_tiflash_mode("test", "t") +┌─get_partition_tables_tiflash_mode(test, t)─┐ +│ fast/fast/fast/ │ +└────────────────────────────────────────────┘ + + + diff --git a/tests/fullstack-test2/ddl/alter_tiflash_mode.test b/tests/fullstack-test2/ddl/alter_tiflash_mode.test new file mode 100644 index 00000000000..c9f3ef488c4 --- /dev/null +++ b/tests/fullstack-test2/ddl/alter_tiflash_mode.test @@ -0,0 +1,48 @@ +# Copyright 2022 PingCAP, Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +mysql> drop table if exists test.t +mysql> create table test.t(a int, b int) +mysql> alter table test.t set tiflash replica 1 + +func> wait_table test t + +# check default mode of tiflash table +mysql> select table_schema,table_name,replica_count,available,table_mode from information_schema.tiflash_replica where table_schema='test' and table_name='t'; ++--------------+------------+---------------+-----------+-----------+ +| table_schema | table_name | replica_count | available | table_mode| ++--------------+------------+---------------+-----------+-----------+ +| test | t | 1 | 1 | NORMAL | ++--------------+------------+---------------+-----------+-----------+ + +# check change mode + +mysql> alter table test.t set tiflash mode fast +mysql> select table_schema,table_name,replica_count,available,table_mode from information_schema.tiflash_replica where table_schema='test' and table_name='t'; ++--------------+------------+---------------+-----------+-----------+ +| table_schema | table_name | replica_count | available | table_mode| ++--------------+------------+---------------+-----------+-----------+ +| test | t | 1 | 1 | FAST | ++--------------+------------+---------------+-----------+-----------+ + +# check change mode +mysql> alter table test.t set tiflash mode normal +mysql> select table_schema,table_name,replica_count,available,table_mode from information_schema.tiflash_replica where table_schema='test' and table_name='t'; ++--------------+------------+---------------+-----------+-----------+ +| table_schema | table_name | replica_count | available | table_mode| ++--------------+------------+---------------+-----------+-----------+ +| test | t | 1 | 1 | NORMAL | ++--------------+------------+---------------+-----------+-----------+ + +mysql> drop table if exists test.t \ No newline at end of file diff --git a/tests/fullstack-test2/ddl/multi_alter_with_write.test b/tests/fullstack-test2/ddl/multi_alter_with_write.test new file mode 100644 index 00000000000..3284511d775 --- /dev/null +++ b/tests/fullstack-test2/ddl/multi_alter_with_write.test @@ -0,0 +1,880 @@ +# Copyright 2022 PingCAP, Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# this test focus on the case when multi DDL actions happen closely +#( and these DDL actions will be fetched in the same regular sync schema duration.) +# and there are some corresponding insert(write) actions between these DDL actions. +# Considering that these write actions and these schema change will arrive at +# tiflash in a different order, we simulate these different order situation to check +# that our schema module was working correctly. + +# TiDB Timeline : write cmd 1 | alter cmd 1 | write cmd 2 | alter cmd 2 | write cmd 3 + +# stop regular schema sync +=> DBGInvoke __enable_schema_sync_service('false') + +# Enable the failpoint and make it pause before applying the raft cmd to write a row +>> DBGInvoke __init_fail_point() +>> DBGInvoke __enable_fail_point(pause_before_apply_raft_cmd) + +# Enable the failpoint to make our query only start when the write action finished +>> DBGInvoke __enable_fail_point(unblock_query_init_after_write) + +# ----------------------------------------------------------------------------- +# Order 1 : write cmd 1 | alter cmd 1 | write cmd 2 | alter cmd 2 | write cmd 3 +# ----------------------------------------------------------------------------- + +mysql> drop table if exists test.t +mysql> create table test.t(a int primary key, b decimal(5,2) not NULL, c varchar(10), d int default 0); + +mysql> alter table test.t set tiflash replica 1; + +func> wait_table test t + +# write cmd 1 +mysql> insert into test.t (a, b, c) values (1, 4.5, 'abc'); + +# enable pause_query_init make query start until write cmd finish +>> DBGInvoke __enable_fail_point(pause_query_init) + +# make write cmd 1 take effect +>> DBGInvoke __disable_fail_point(pause_before_apply_raft_cmd) + +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─b────┬─c─────┬─d───┐ +│ 1 │ 4.50 │ abc │ 0 │ +└─────┴──────┴───────┴─────┘ + +>> DBGInvoke __enable_fail_point(pause_before_apply_raft_cmd) + +# alter cmd 1 +mysql> alter table test.t add column e decimal(6,1) NULL; + +# make alter cmd 1 take effect +>> DBGInvoke __refresh_schemas() + +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─b────┬─c─────┬─d───┬─e───┐ +│ 1 │ 4.50 │ abc │ 0 │ \N │ +└─────┴──────┴───────┴─────┴─────┘ + +# write cmd 2 +mysql> insert into test.t values (3, 0.2, 'ccc', 3, 0.1); + +# enable pause_query_init make query start until write cmd finish +>> DBGInvoke __enable_fail_point(pause_query_init) + +# make write cmd 2 take effect +>> DBGInvoke __disable_fail_point(pause_before_apply_raft_cmd) + +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─b────┬─c─────┬─d───┬─e───┐ +│ 1 │ 4.50 │ abc │ 0 │ \N │ +│ 3 │ 0.20 │ ccc │ 3 │ 0.1 │ +└─────┴──────┴───────┴─────┴─────┘ + +>> DBGInvoke __enable_fail_point(pause_before_apply_raft_cmd) + +# alter cmd 2 +mysql> alter table test.t drop column b; + +# make alter cmd 2 take effect +>> DBGInvoke __refresh_schemas() + +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─c─────┬─d───┬─e───┐ +│ 1 │ abc │ 0 │ \N │ +│ 3 │ ccc │ 3 │ 0.1 │ +└─────┴───────┴─────┴─────┘ + +# write cmd 3 +mysql> insert into test.t values (4, 'abcd', 10, 0.2); + +# enable pause_query_init make query start until write cmd finish +>> DBGInvoke __enable_fail_point(pause_query_init) + +# make write cmd 3 take effect +>> DBGInvoke __disable_fail_point(pause_before_apply_raft_cmd) + +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─c─────┬─d───┬─e───┐ +│ 1 │ abc │ 0 │ \N │ +│ 3 │ ccc │ 3 │ 0.1 │ +│ 4 │ abcd │ 10 │ 0.2 │ +└─────┴───────┴─────┴─────┘ + +>> DBGInvoke __enable_fail_point(pause_before_apply_raft_cmd) + +# --------------------------------------------------------------------------------------------- +# Order 2 : write cmd 1 | alter cmd 1 | write cmd 2 | write cmd 3 --> sync schema(alter cmd 2) +# --------------------------------------------------------------------------------------------- + +mysql> drop table if exists test.t +mysql> create table test.t(a int primary key, b decimal(5,2) not NULL, c varchar(10), d int default 0); + +mysql> alter table test.t set tiflash replica 1; + +func> wait_table test t + +# write cmd 1 +mysql> insert into test.t (a, b, c) values (1, 4.5, 'abc'); + +# enable pause_query_init make query start until write cmd finish +>> DBGInvoke __enable_fail_point(pause_query_init) + +# make write cmd 1 take effect +>> DBGInvoke __disable_fail_point(pause_before_apply_raft_cmd) + +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─b────┬─c─────┬─d───┐ +│ 1 │ 4.50 │ abc │ 0 │ +└─────┴──────┴───────┴─────┘ + +>> DBGInvoke __enable_fail_point(pause_before_apply_raft_cmd) + +# alter cmd 1 +mysql> alter table test.t add column e decimal(6,1) NULL; + +# make alter cmd 1 take effect +>> DBGInvoke __refresh_schemas() + +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─b────┬─c─────┬─d───┬─e───┐ +│ 1 │ 4.50 │ abc │ 0 │ \N │ +└─────┴──────┴───────┴─────┴─────┘ + +# write cmd 2 +mysql> insert into test.t values (3, 0.2, 'ccc', 3, 0.1); + +# enable pause_query_init make query start until write cmd finish +>> DBGInvoke __enable_fail_point(pause_query_init) + +# make write cmd 2 take effect +>> DBGInvoke __disable_fail_point(pause_before_apply_raft_cmd) + +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─b────┬─c─────┬─d───┬─e───┐ +│ 1 │ 4.50 │ abc │ 0 │ \N │ +│ 3 │ 0.20 │ ccc │ 3 │ 0.1 │ +└─────┴──────┴───────┴─────┴─────┘ + +>> DBGInvoke __enable_fail_point(pause_before_apply_raft_cmd) + +# alter cmd 2 +mysql> alter table test.t drop column b; + +# write cmd 3 +mysql> insert into test.t values (4, 'abcd', 10, 0.2); + +# enable pause_query_init make query start until write cmd finish +>> DBGInvoke __enable_fail_point(pause_query_init) + +# make write cmd 3 take effect +>> DBGInvoke __disable_fail_point(pause_before_apply_raft_cmd) + +# check what happen after write cmd 3 --> call sync schema and get alter cmd 2 happen +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─c─────┬─d───┬─e───┐ +│ 1 │ abc │ 0 │ \N │ +│ 3 │ ccc │ 3 │ 0.1 │ +│ 4 │ abcd │ 10 │ 0.2 │ +└─────┴───────┴─────┴─────┘ + +>> DBGInvoke __enable_fail_point(pause_before_apply_raft_cmd) + +# ----------------------------------------------------------------------------------------------- +# Order 3 : write cmd 1 | alter cmd 1 | alter cmd 2 | write cmd 2 -->sync schema() | write cmd 3 +# ----------------------------------------------------------------------------------------------- + +mysql> drop table if exists test.t +mysql> create table test.t(a int primary key, b decimal(5,2) not NULL, c varchar(10), d int default 0); + +mysql> alter table test.t set tiflash replica 1; + +func> wait_table test t + +# write cmd 1 +mysql> insert into test.t (a, b, c) values (1, 4.5, 'abc'); + +# enable pause_query_init make query start until write cmd finish +>> DBGInvoke __enable_fail_point(pause_query_init) + +# make write cmd 1 take effect +>> DBGInvoke __disable_fail_point(pause_before_apply_raft_cmd) + +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─b────┬─c─────┬─d───┐ +│ 1 │ 4.50 │ abc │ 0 │ +└─────┴──────┴───────┴─────┘ + +>> DBGInvoke __enable_fail_point(pause_before_apply_raft_cmd) + +# alter cmd 1 +mysql> alter table test.t add column e decimal(6,1) NULL; + +# make alter cmd 1 take effect +>> DBGInvoke __refresh_schemas() + +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─b────┬─c─────┬─d───┬─e───┐ +│ 1 │ 4.50 │ abc │ 0 │ \N │ +└─────┴──────┴───────┴─────┴─────┘ + +# write cmd 2 +mysql> insert into test.t values (3, 0.2, 'ccc', 3, 0.1); + +# alter cmd 2 +mysql> alter table test.t drop column b; + +# make alter cmd 2 take effect +>> DBGInvoke __refresh_schemas() + +# enable pause_query_init make query start until write cmd finish +>> DBGInvoke __enable_fail_point(pause_query_init) + +# make write cmd 2 take effect +>> DBGInvoke __disable_fail_point(pause_before_apply_raft_cmd) + +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─c─────┬─d───┬─e───┐ +│ 1 │ abc │ 0 │ \N │ +│ 3 │ ccc │ 3 │ 0.1 │ +└─────┴───────┴─────┴─────┘ + +>> DBGInvoke __enable_fail_point(pause_before_apply_raft_cmd) + +# write cmd 3 +mysql> insert into test.t values (4, 'abcd', 10, 0.2); + +# enable pause_query_init make query start until write cmd finish +>> DBGInvoke __enable_fail_point(pause_query_init) + +# make write cmd 3 take effect +>> DBGInvoke __disable_fail_point(pause_before_apply_raft_cmd) + +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─c─────┬─d───┬─e───┐ +│ 1 │ abc │ 0 │ \N │ +│ 3 │ ccc │ 3 │ 0.1 │ +│ 4 │ abcd │ 10 │ 0.2 │ +└─────┴───────┴─────┴─────┘ + +>> DBGInvoke __enable_fail_point(pause_before_apply_raft_cmd) + +# ----------------------------------------------------------------------------------------------- +# Order 4 : write cmd 1 | write cmd 2 --> sync schema(alter cmd 1) | alter cmd 2 | write cmd 3 +# ----------------------------------------------------------------------------------------------- + +mysql> drop table if exists test.t +mysql> create table test.t(a int primary key, b decimal(5,2) not NULL, c varchar(10), d int default 0); + +mysql> alter table test.t set tiflash replica 1; + +func> wait_table test t + +# write cmd 1 +mysql> insert into test.t (a, b, c) values (1, 4.5, 'abc'); + +# enable pause_query_init make query start until write cmd finish +>> DBGInvoke __enable_fail_point(pause_query_init) + +# make write cmd 1 take effect +>> DBGInvoke __disable_fail_point(pause_before_apply_raft_cmd) + +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─b────┬─c─────┬─d───┐ +│ 1 │ 4.50 │ abc │ 0 │ +└─────┴──────┴───────┴─────┘ + +>> DBGInvoke __enable_fail_point(pause_before_apply_raft_cmd) + +# alter cmd 1 +mysql> alter table test.t add column e decimal(6,1) NULL; + +# check no schema change before write cmd 2 take effect +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─b────┬─c─────┬─d───┐ +│ 1 │ 4.50 │ abc │ 0 │ +└─────┴──────┴───────┴─────┘ + +# write cmd 2 +mysql> insert into test.t values (3, 0.2, 'ccc', 3, 0.1); + +# enable pause_query_init make query start until write cmd finish +>> DBGInvoke __enable_fail_point(pause_query_init) + +# make write cmd 2 take effect +>> DBGInvoke __disable_fail_point(pause_before_apply_raft_cmd) + +# check what happen after write cmd 2 --> should call sync schema, get the alter cmd 1 happened. +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─b────┬─c─────┬─d───┬─e───┐ +│ 1 │ 4.50 │ abc │ 0 │ \N │ +│ 3 │ 0.20 │ ccc │ 3 │ 0.1 │ +└─────┴──────┴───────┴─────┴─────┘ + +>> DBGInvoke __enable_fail_point(pause_before_apply_raft_cmd) + +# alter cmd 2 +mysql> alter table test.t drop column b; + +# make alter cmd 2 take effect +>> DBGInvoke __refresh_schemas() + +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─c─────┬─d───┬─e───┐ +│ 1 │ abc │ 0 │ \N │ +│ 3 │ ccc │ 3 │ 0.1 │ +└─────┴───────┴─────┴─────┘ + +# write cmd 3 +mysql> insert into test.t values (4, 'abcd', 10, 0.2); + +# enable pause_query_init make query start until write cmd finish +>> DBGInvoke __enable_fail_point(pause_query_init) + +# make write cmd 3 take effect +>> DBGInvoke __disable_fail_point(pause_before_apply_raft_cmd) + +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─c─────┬─d───┬─e───┐ +│ 1 │ abc │ 0 │ \N │ +│ 3 │ ccc │ 3 │ 0.1 │ +│ 4 │ abcd │ 10 │ 0.2 │ +└─────┴───────┴─────┴─────┘ + +>> DBGInvoke __enable_fail_point(pause_before_apply_raft_cmd) + +# --------------------------------------------------------------------------------------------------------------------- +# Order 5 : write cmd 1 | write cmd 2 --> sync schema(alter cmd 1) | write cmd 3 --> sync schema(alter cmd 2) +# ---------------------------------------------------------------------------------------------------------------------- + +mysql> drop table if exists test.t +mysql> create table test.t(a int primary key, b decimal(5,2) not NULL, c varchar(10), d int default 0); + +mysql> alter table test.t set tiflash replica 1; + +func> wait_table test t + +# write cmd 1 +mysql> insert into test.t (a, b, c) values (1, 4.5, 'abc'); + +# enable pause_query_init make query start until write cmd finish +>> DBGInvoke __enable_fail_point(pause_query_init) + +# make write cmd 1 take effect +>> DBGInvoke __disable_fail_point(pause_before_apply_raft_cmd) + +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─b────┬─c─────┬─d───┐ +│ 1 │ 4.50 │ abc │ 0 │ +└─────┴──────┴───────┴─────┘ + +>> DBGInvoke __enable_fail_point(pause_before_apply_raft_cmd) + +# alter cmd 1 +mysql> alter table test.t add column e decimal(6,1) NULL; + +# write cmd 2 +mysql> insert into test.t values (3, 0.2, 'ccc', 3, 0.1); + +# enable pause_query_init make query start until write cmd finish +>> DBGInvoke __enable_fail_point(pause_query_init) + +# make write cmd 2 take effect +>> DBGInvoke __disable_fail_point(pause_before_apply_raft_cmd) + +# check what happen after write cmd 2 --> should call sync schema, get the alter cmd 1 happened. +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─b────┬─c─────┬─d───┬─e───┐ +│ 1 │ 4.50 │ abc │ 0 │ \N │ +│ 3 │ 0.20 │ ccc │ 3 │ 0.1 │ +└─────┴──────┴───────┴─────┴─────┘ + +>> DBGInvoke __enable_fail_point(pause_before_apply_raft_cmd) + +# alter cmd 2 +mysql> alter table test.t drop column b; + +# write cmd 3 +mysql> insert into test.t values (4, 'abcd', 10, 0.2); + +# enable pause_query_init make query start until write cmd finish +>> DBGInvoke __enable_fail_point(pause_query_init) + +# make write cmd 3 take effect +>> DBGInvoke __disable_fail_point(pause_before_apply_raft_cmd) + +# check what happen after write cmd 3 --> should call sync schema, get the alter cmd 2 happened. +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─c─────┬─d───┬─e───┐ +│ 1 │ abc │ 0 │ \N │ +│ 3 │ ccc │ 3 │ 0.1 │ +│ 4 │ abcd │ 10 │ 0.2 │ +└─────┴───────┴─────┴─────┘ + +>> DBGInvoke __enable_fail_point(pause_before_apply_raft_cmd) + +# ----------------------------------------------------------------------------------------------- +# Order 6 : write cmd 1 | write cmd 2 --> sync schema(alter cmd 1 alter cmd 2) | write cmd 3 +# ----------------------------------------------------------------------------------------------- + +mysql> drop table if exists test.t +mysql> create table test.t(a int primary key, b decimal(5,2) not NULL, c varchar(10), d int default 0); + +mysql> alter table test.t set tiflash replica 1; + +func> wait_table test t + +# write cmd 1 +mysql> insert into test.t (a, b, c) values (1, 4.5, 'abc'); + +# enable pause_query_init make query start until write cmd finish +>> DBGInvoke __enable_fail_point(pause_query_init) + +# make write cmd 1 take effect +>> DBGInvoke __disable_fail_point(pause_before_apply_raft_cmd) + +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─b────┬─c─────┬─d───┐ +│ 1 │ 4.50 │ abc │ 0 │ +└─────┴──────┴───────┴─────┘ + +>> DBGInvoke __enable_fail_point(pause_before_apply_raft_cmd) + +# alter cmd 1 +mysql> alter table test.t add column e decimal(6,1) NULL; + +# write cmd 2 +mysql> insert into test.t values (3, 0.2, 'ccc', 3, 0.1); + +# alter cmd 2 +mysql> alter table test.t drop column b; + +# enable pause_query_init make query start until write cmd finish +>> DBGInvoke __enable_fail_point(pause_query_init) + +# make write cmd 2 take effect +>> DBGInvoke __disable_fail_point(pause_before_apply_raft_cmd) + +# check what happen after write cmd 2 --> should call sync schema, get the alter cmd 1 && alter cmd 2 happened. +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─c─────┬─d───┬─e───┐ +│ 1 │ abc │ 0 │ \N │ +│ 3 │ ccc │ 3 │ 0.1 │ +└─────┴───────┴─────┴─────┘ + +>> DBGInvoke __enable_fail_point(pause_before_apply_raft_cmd) + +# write cmd 3 +mysql> insert into test.t values (4, 'abcd', 10, 0.2); + +# enable pause_query_init make query start until write cmd finish +>> DBGInvoke __enable_fail_point(pause_query_init) + +# make write cmd 3 take effect +>> DBGInvoke __disable_fail_point(pause_before_apply_raft_cmd) + +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─c─────┬─d───┬─e───┐ +│ 1 │ abc │ 0 │ \N │ +│ 3 │ ccc │ 3 │ 0.1 │ +│ 4 │ abcd │ 10 │ 0.2 │ +└─────┴───────┴─────┴─────┘ + +>> DBGInvoke __enable_fail_point(pause_before_apply_raft_cmd) + +# ------------------------------------------------------------------------------- +# Order 7 : alter cmd 1 | write cmd 1 | write cmd 2 | alter cmd 2 | write cmd 3 +# ------------------------------------------------------------------------------- + +mysql> drop table if exists test.t +mysql> create table test.t(a int primary key, b decimal(5,2) not NULL, c varchar(10), d int default 0); + +mysql> alter table test.t set tiflash replica 1; + +func> wait_table test t + +# add a new pre write to make check the alter cmd 1 more convenient. +mysql> insert into test.t (a, b, c) values (0, 0, ' '); + +# enable pause_query_init make query start until write cmd finish +>> DBGInvoke __enable_fail_point(pause_query_init) + +>> DBGInvoke __disable_fail_point(pause_before_apply_raft_cmd) + +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─b────┬─c─────┬─d───┐ +│ 0 │ 0.00 │ │ 0 │ +└─────┴──────┴───────┴─────┘ + +>> DBGInvoke __enable_fail_point(pause_before_apply_raft_cmd) + + +# alter cmd 1 +mysql> alter table test.t add column e decimal(6,1) NULL; + +# make alter cmd 1 take effect +>> DBGInvoke __refresh_schemas() + +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─b────┬─c─────┬─d───┬─e───┐ +│ 0 │ 0.00 │ │ 0 │ \N │ +└─────┴──────┴───────┴─────┴─────┘ + +# write cmd 1 +mysql> insert into test.t (a, b, c) values (1, 4.5, 'abc'); + +# enable pause_query_init make query start until write cmd finish +>> DBGInvoke __enable_fail_point(pause_query_init) + +# make write cmd 1 take effect +>> DBGInvoke __disable_fail_point(pause_before_apply_raft_cmd) + + +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─b────┬─c─────┬─d───┬─e───┐ +│ 0 │ 0.00 │ │ 0 │ \N │ +│ 1 │ 4.50 │ abc │ 0 │ \N │ +└─────┴──────┴───────┴─────┴─────┘ + +>> DBGInvoke __enable_fail_point(pause_before_apply_raft_cmd) + +# write cmd 2 +mysql> insert into test.t values (3, 0.2, 'ccc', 3, 0.1); + +# enable pause_query_init make query start until write cmd finish +>> DBGInvoke __enable_fail_point(pause_query_init) + +# make write cmd 2 take effect +>> DBGInvoke __disable_fail_point(pause_before_apply_raft_cmd) + +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─b────┬─c─────┬─d───┬─e───┐ +│ 0 │ 0.00 │ │ 0 │ \N │ +│ 1 │ 4.50 │ abc │ 0 │ \N │ +│ 3 │ 0.20 │ ccc │ 3 │ 0.1 │ +└─────┴──────┴───────┴─────┴─────┘ + +>> DBGInvoke __enable_fail_point(pause_before_apply_raft_cmd) + +# alter cmd 2 +mysql> alter table test.t drop column b; + +# make alter cmd 2 take effect +>> DBGInvoke __refresh_schemas() + +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─c─────┬─d───┬─e───┐ +│ 0 │ │ 0 │ \N │ +│ 1 │ abc │ 0 │ \N │ +│ 3 │ ccc │ 3 │ 0.1 │ +└─────┴───────┴─────┴─────┘ + +# write cmd 3 +mysql> insert into test.t values (4, 'abcd', 10, 0.2); + +# enable pause_query_init make query start until write cmd finish +>> DBGInvoke __enable_fail_point(pause_query_init) + +# make write cmd 3 take effect +>> DBGInvoke __disable_fail_point(pause_before_apply_raft_cmd) + +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─c─────┬─d───┬─e───┐ +│ 0 │ │ 0 │ \N │ +│ 1 │ abc │ 0 │ \N │ +│ 3 │ ccc │ 3 │ 0.1 │ +│ 4 │ abcd │ 10 │ 0.2 │ +└─────┴───────┴─────┴─────┘ + +>> DBGInvoke __enable_fail_point(pause_before_apply_raft_cmd) + +# -------------------------------------------------------------------------------------------------- +# Order 8 : alter cmd 1 | write cmd 1 | write cmd 2 | write cmd 3 --> sync schema(alter cmd 2) +# -------------------------------------------------------------------------------------------------- + +mysql> drop table if exists test.t +mysql> create table test.t(a int primary key, b decimal(5,2) not NULL, c varchar(10), d int default 0); + +mysql> alter table test.t set tiflash replica 1; + +func> wait_table test t + +# add a new pre write to make check the alter cmd 1 more convenient. +mysql> insert into test.t (a, b, c) values (0, 0, ' '); + +# enable pause_query_init make query start until write cmd finish +>> DBGInvoke __enable_fail_point(pause_query_init) + +>> DBGInvoke __disable_fail_point(pause_before_apply_raft_cmd) + +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─b────┬─c─────┬─d───┐ +│ 0 │ 0.00 │ │ 0 │ +└─────┴──────┴───────┴─────┘ + +>> DBGInvoke __enable_fail_point(pause_before_apply_raft_cmd) + +# alter cmd 1 +mysql> alter table test.t add column e decimal(6,1) NULL; + +# make alter cmd 1 take effect +>> DBGInvoke __refresh_schemas() + +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─b────┬─c─────┬─d───┬─e───┐ +│ 0 │ 0.00 │ │ 0 │ \N │ +└─────┴──────┴───────┴─────┴─────┘ + +# write cmd 1 +mysql> insert into test.t (a, b, c) values (1, 4.5, 'abc'); + +# enable pause_query_init make query start until write cmd finish +>> DBGInvoke __enable_fail_point(pause_query_init) + +# make write cmd 1 take effect +>> DBGInvoke __disable_fail_point(pause_before_apply_raft_cmd) + +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─b────┬─c─────┬─d───┬─e───┐ +│ 0 │ 0.00 │ │ 0 │ \N │ +│ 1 │ 4.50 │ abc │ 0 │ \N │ +└─────┴──────┴───────┴─────┴─────┘ + +>> DBGInvoke __enable_fail_point(pause_before_apply_raft_cmd) + +# write cmd 2 +mysql> insert into test.t values (3, 0.2, 'ccc', 3, 0.1); + +# enable pause_query_init make query start until write cmd finish +>> DBGInvoke __enable_fail_point(pause_query_init) + +# make write cmd 2 take effect +>> DBGInvoke __disable_fail_point(pause_before_apply_raft_cmd) + +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─b────┬─c─────┬─d───┬─e───┐ +│ 0 │ 0.00 │ │ 0 │ \N │ +│ 1 │ 4.50 │ abc │ 0 │ \N │ +│ 3 │ 0.20 │ ccc │ 3 │ 0.1 │ +└─────┴──────┴───────┴─────┴─────┘ + +>> DBGInvoke __enable_fail_point(pause_before_apply_raft_cmd) + +# alter cmd 2 +mysql> alter table test.t drop column b; + +# write cmd 3 +mysql> insert into test.t values (4, 'abcd', 10, 0.2); + +# enable pause_query_init make query start until write cmd finish +>> DBGInvoke __enable_fail_point(pause_query_init) + +# make write cmd 3 take effect +>> DBGInvoke __disable_fail_point(pause_before_apply_raft_cmd) + +# check what happen after write cmd 3 --> should call sync schema, get the alter cmd 2 happened. +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─c─────┬─d───┬─e───┐ +│ 0 │ │ 0 │ \N │ +│ 1 │ abc │ 0 │ \N │ +│ 3 │ ccc │ 3 │ 0.1 │ +│ 4 │ abcd │ 10 │ 0.2 │ +└─────┴───────┴─────┴─────┘ + +>> DBGInvoke __enable_fail_point(pause_before_apply_raft_cmd) + +# -------------------------------------------------------------------------------------------------- +# Order 9 : alter cmd 1 | write cmd 1 | alter cmd 2 | write cmd 2 -->sync schema() | write cmd 3 +# -------------------------------------------------------------------------------------------------- + +mysql> drop table if exists test.t +mysql> create table test.t(a int primary key, b decimal(5,2) not NULL, c varchar(10), d int default 0); + +mysql> alter table test.t set tiflash replica 1; + +func> wait_table test t + +# add a new pre write to make check the alter cmd 1 more convenient. +mysql> insert into test.t (a, b, c) values (0, 0, ' '); + +# enable pause_query_init make query start until write cmd finish +>> DBGInvoke __enable_fail_point(pause_query_init) + +>> DBGInvoke __disable_fail_point(pause_before_apply_raft_cmd) + +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─b────┬─c─────┬─d───┐ +│ 0 │ 0.00 │ │ 0 │ +└─────┴──────┴───────┴─────┘ + +>> DBGInvoke __enable_fail_point(pause_before_apply_raft_cmd) + +# alter cmd 1 +mysql> alter table test.t add column e decimal(6,1) NULL; + +# make alter cmd 1 take effect +>> DBGInvoke __refresh_schemas() + +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─b────┬─c─────┬─d───┬─e───┐ +│ 0 │ 0.00 │ │ 0 │ \N │ +└─────┴──────┴───────┴─────┴─────┘ + +# write cmd 1 +mysql> insert into test.t (a, b, c) values (1, 4.5, 'abc'); + +# enable pause_query_init make query start until write cmd finish +>> DBGInvoke __enable_fail_point(pause_query_init) + +# make write cmd 1 take effect +>> DBGInvoke __disable_fail_point(pause_before_apply_raft_cmd) + +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─b────┬─c─────┬─d───┬─e───┐ +│ 0 │ 0.00 │ │ 0 │ \N │ +│ 1 │ 4.50 │ abc │ 0 │ \N │ +└─────┴──────┴───────┴─────┴─────┘ + +>> DBGInvoke __enable_fail_point(pause_before_apply_raft_cmd) + +# write cmd 2 +mysql> insert into test.t values (3, 0.2, 'ccc', 3, 0.1); + +# alter cmd 2 +mysql> alter table test.t drop column b; + +# make alter cmd 2 take effect +>> DBGInvoke __refresh_schemas() + +# enable pause_query_init make query start until write cmd finish +>> DBGInvoke __enable_fail_point(pause_query_init) + +# make write cmd 2 take effect +>> DBGInvoke __disable_fail_point(pause_before_apply_raft_cmd) + +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─c─────┬─d───┬─e───┐ +│ 0 │ │ 0 │ \N │ +│ 1 │ abc │ 0 │ \N │ +│ 3 │ ccc │ 3 │ 0.1 │ +└─────┴───────┴─────┴─────┘ + +>> DBGInvoke __enable_fail_point(pause_before_apply_raft_cmd) + +# write cmd 3 +mysql> insert into test.t values (4, 'abcd', 10, 0.2); + +# enable pause_query_init make query start until write cmd finish +>> DBGInvoke __enable_fail_point(pause_query_init) + +# make write cmd 3 take effect +>> DBGInvoke __disable_fail_point(pause_before_apply_raft_cmd) + +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─c─────┬─d───┬─e───┐ +│ 0 │ │ 0 │ \N │ +│ 1 │ abc │ 0 │ \N │ +│ 3 │ ccc │ 3 │ 0.1 │ +│ 4 │ abcd │ 10 │ 0.2 │ +└─────┴───────┴─────┴─────┘ + +>> DBGInvoke __enable_fail_point(pause_before_apply_raft_cmd) + +# ------------------------------------------------------------------------------------------------------------------ +# Order 10 : alter cmd 1 | alter cmd 2 | write cmd 1 -->sync schema() | write cmd 2 -->sync schema() | write cmd 3 +# ------------------------------------------------------------------------------------------------------------------ + +mysql> drop table if exists test.t +mysql> create table test.t(a int primary key, b decimal(5,2) not NULL, c varchar(10), d int default 0); + +mysql> alter table test.t set tiflash replica 1; + +func> wait_table test t + +# add a new pre write to make check the alter cmd 1 more convenient. +mysql> insert into test.t (a, b, c) values (0, 0, ' '); + +# enable pause_query_init make query start until write cmd finish +>> DBGInvoke __enable_fail_point(pause_query_init) + +>> DBGInvoke __disable_fail_point(pause_before_apply_raft_cmd) + +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─b────┬─c─────┬─d───┐ +│ 0 │ 0.00 │ │ 0 │ +└─────┴──────┴───────┴─────┘ + +>> DBGInvoke __enable_fail_point(pause_before_apply_raft_cmd) + +# write cmd 1 +mysql> insert into test.t (a, b, c) values (1, 4.5, 'abc'); + +# alter cmd 1 +mysql> alter table test.t add column e decimal(6,1) NULL; + +# make alter cmd 1 take effect +>> DBGInvoke __refresh_schemas() + +# write cmd 2 +mysql> insert into test.t values (3, 0.2, 'ccc', 3, 0.1); + +# alter cmd 2 +mysql> alter table test.t drop column b; + +# make alter cmd 2 take effect +>> DBGInvoke __refresh_schemas() + +# enable pause_query_init make query start until write cmd finish +>> DBGInvoke __enable_fail_point(pause_query_init) + +# make write cmd 1 and write cmd 2 take effect +>> DBGInvoke __disable_fail_point(pause_before_apply_raft_cmd) + +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─c─────┬─d───┬─e───┐ +│ 0 │ │ 0 │ \N │ +│ 1 │ abc │ 0 │ \N │ +│ 3 │ ccc │ 3 │ 0.1 │ +└─────┴───────┴─────┴─────┘ + +>> DBGInvoke __enable_fail_point(pause_before_apply_raft_cmd) + +# write cmd 3 +mysql> insert into test.t values (4, 'abcd', 10, 0.2); + +# enable pause_query_init make query start until write cmd finish +>> DBGInvoke __enable_fail_point(pause_query_init) + +# make write cmd 3 take effect +>> DBGInvoke __disable_fail_point(pause_before_apply_raft_cmd) + +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─c─────┬─d───┬─e───┐ +│ 0 │ │ 0 │ \N │ +│ 1 │ abc │ 0 │ \N │ +│ 3 │ ccc │ 3 │ 0.1 │ +│ 4 │ abcd │ 10 │ 0.2 │ +└─────┴───────┴─────┴─────┘ + +>> DBGInvoke __enable_fail_point(pause_before_apply_raft_cmd) + + +## + +=> DBGInvoke __enable_schema_sync_service('true') +>> DBGInvoke __disable_fail_point(pause_before_apply_raft_cmd) +>> DBGInvoke __disable_fail_point(unblock_query_init_after_write) +>> DBGInvoke __disable_fail_point(pause_query_init) \ No newline at end of file diff --git a/tests/run-test.py b/tests/run-test.py index 843fe7c79b4..a2bcee0ce99 100644 --- a/tests/run-test.py +++ b/tests/run-test.py @@ -29,6 +29,7 @@ UNFINISHED_1_PREFIX = '\t' UNFINISHED_2_PREFIX = ' ' WORD_PH = '{#WORD}' +LINE_PH = '{#LINE}' CURL_TIDB_STATUS_PREFIX = 'curl_tidb> ' verbose = False @@ -138,18 +139,22 @@ def match_ph_word(line): # TODO: Support more place holders, eg: {#NUMBER} def compare_line(line, template): - while True: - i = template.find(WORD_PH) - if i < 0: - return line == template - else: - if line[:i] != template[:i]: - return False - j = match_ph_word(line[i:]) - if j == 0: - return False - template = template[i + len(WORD_PH):] - line = line[i + j:] + l = template.find(LINE_PH) + if l >= 0: + return True + else: + while True: + i = template.find(WORD_PH) + if i < 0: + return line == template + else: + if line[:i] != template[:i]: + return False + j = match_ph_word(line[i:]) + if j == 0: + return False + template = template[i + len(WORD_PH):] + line = line[i + j:] class MySQLCompare: @@ -194,11 +199,14 @@ def matched(outputs, matches): b = MySQLCompare.parse_excepted_outputs(matches) return a == b else: - if len(outputs) != len(matches): + if len(outputs) > len(matches): return False for i in range(0, len(outputs)): if not compare_line(outputs[i], matches[i]): return False + for i in range(len(outputs), len(matches)): + if not compare_line("", matches[i]): + return False return True @@ -212,11 +220,14 @@ def matched(outputs, matches, fuzz): b = parse_table_parts(matches, fuzz) return a == b else: - if len(outputs) != len(matches): + if len(outputs) > len(matches): return False for i in range(0, len(outputs)): if not compare_line(outputs[i], matches[i]): return False + for i in range(len(outputs), len(matches)): + if not compare_line("", matches[i]): + return False return True diff --git a/tests/sanitize/tsan.suppression b/tests/sanitize/tsan.suppression new file mode 100644 index 00000000000..73824caa2b9 --- /dev/null +++ b/tests/sanitize/tsan.suppression @@ -0,0 +1 @@ +race:dbms/src/Common/TiFlashMetrics.h diff --git a/tests/tidb-ci/new_collation_fullstack/expr.test b/tests/tidb-ci/new_collation_fullstack/expr.test index 15ada0f335c..1e2135c4f2d 100644 --- a/tests/tidb-ci/new_collation_fullstack/expr.test +++ b/tests/tidb-ci/new_collation_fullstack/expr.test @@ -35,6 +35,13 @@ mysql> set session tidb_isolation_read_engines='tiflash'; select /*+ read_from_s | 2 | abc | +------+-------+ +mysql> set session tidb_isolation_read_engines='tiflash'; select /*+ read_from_storage(tiflash[t]) */ id, value1 from test.t where value1 = 'abc '; ++------+-------+ +| id | value1| ++------+-------+ +| 1 | abc | +| 2 | abc | ++------+-------+ mysql> set session tidb_isolation_read_engines='tiflash'; select /*+ read_from_storage(tiflash[t]) */ id, value from test.t where value like 'aB%'; +------+-------+ @@ -62,6 +69,13 @@ mysql> set session tidb_isolation_read_engines='tiflash'; select /*+ read_from_s | 3 | def | +------+-------+ +mysql> set session tidb_isolation_read_engines='tiflash'; select /*+ read_from_storage(tiflash[t]) */ id, value1 from test.t where value1 = 'def '; ++------+-------+ +| id | value1| ++------+-------+ +| 3 | def | ++------+-------+ + mysql> set session tidb_isolation_read_engines='tiflash'; select /*+ read_from_storage(tiflash[t]) */ id, value1 from test.t where value1 in ('Abc','def'); +------+-------+ | id | value1|