From 75b7b6012019c1e512f7f42b0eb138cc731ec943 Mon Sep 17 00:00:00 2001 From: Ti Chi Robot Date: Mon, 24 Jan 2022 16:28:11 +0800 Subject: [PATCH 01/21] try copy libnsl.so to INSTALL_DIR (#3203) (#3206) --- release-centos7/build/build-tiflash-ci.sh | 3 ++ .../build/build-tiflash-release.sh | 28 +++++++++++-------- 2 files changed, 19 insertions(+), 12 deletions(-) diff --git a/release-centos7/build/build-tiflash-ci.sh b/release-centos7/build/build-tiflash-ci.sh index 70a59285578..903f0d57e04 100755 --- a/release-centos7/build/build-tiflash-ci.sh +++ b/release-centos7/build/build-tiflash-ci.sh @@ -160,6 +160,9 @@ cp -r "${SRCPATH}/cluster_manage/dist/flash_cluster_manager" "${INSTALL_DIR}"/fl cp -f "$build_dir/dbms/src/Server/tiflash" "${INSTALL_DIR}/tiflash" cp -f "${SRCPATH}/libs/libtiflash-proxy/libtiflash_proxy.so" "${INSTALL_DIR}/libtiflash_proxy.so" ldd "${INSTALL_DIR}/tiflash" + +ldd "${INSTALL_DIR}/tiflash" | grep 'libnsl.so' | grep '=>' | awk '{print $3}' | xargs -I {} cp {} "${INSTALL_DIR}" + cd "${INSTALL_DIR}" chrpath -d libtiflash_proxy.so "${INSTALL_DIR}/tiflash" ldd "${INSTALL_DIR}/tiflash" diff --git a/release-centos7/build/build-tiflash-release.sh b/release-centos7/build/build-tiflash-release.sh index 7a6a5e9db8f..e1ada2afea3 100755 --- a/release-centos7/build/build-tiflash-release.sh +++ b/release-centos7/build/build-tiflash-release.sh @@ -20,7 +20,7 @@ SRCPATH=$(cd ${SCRIPTPATH}/../..; pwd -P) NPROC=${NPROC:-$(nproc || grep -c ^processor /proc/cpuinfo)} ENABLE_EMBEDDED_COMPILER="FALSE" -install_dir="$SRCPATH/release-centos7/tiflash" +INSTALL_DIR="${SRCPATH}/release-centos7/tiflash" if [ -d "$SRCPATH/contrib/kvproto" ]; then cd "$SRCPATH/contrib/kvproto" @@ -40,10 +40,10 @@ rm -rf ${SRCPATH}/libs/libtiflash-proxy mkdir -p ${SRCPATH}/libs/libtiflash-proxy ln -s ${SRCPATH}/contrib/tiflash-proxy/target/release/libtiflash_proxy.so ${SRCPATH}/libs/libtiflash-proxy/libtiflash_proxy.so -build_dir="$SRCPATH/release-centos7/build-release" -rm -rf $build_dir && mkdir -p $build_dir && cd $build_dir +BUILD_DIR="${SRCPATH}/release-centos7/build-release" +rm -rf ${BUILD_DIR} && mkdir -p ${BUILD_DIR} && cd ${BUILD_DIR} -cmake "$SRCPATH" ${DEFINE_CMAKE_PREFIX_PATH} \ +cmake "${SRCPATH}" ${DEFINE_CMAKE_PREFIX_PATH} \ -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} \ -DENABLE_EMBEDDED_COMPILER=${ENABLE_EMBEDDED_COMPILER} \ -DENABLE_ICU=OFF \ @@ -53,15 +53,19 @@ cmake "$SRCPATH" ${DEFINE_CMAKE_PREFIX_PATH} \ -Wno-dev \ -DUSE_CCACHE=OFF -make -j $NPROC tiflash +make -j ${NPROC} tiflash # Reduce binary size by compressing. -objcopy --compress-debug-sections=zlib-gnu "$build_dir/dbms/src/Server/tiflash" +objcopy --compress-debug-sections=zlib-gnu "${BUILD_DIR}/dbms/src/Server/tiflash" -cp -f "$build_dir/dbms/src/Server/tiflash" "$install_dir/tiflash" -cp -f "${SRCPATH}/libs/libtiflash-proxy/libtiflash_proxy.so" "$install_dir/libtiflash_proxy.so" +cp -f "${BUILD_DIR}/dbms/src/Server/tiflash" "${INSTALL_DIR}/tiflash" +cp -f "${SRCPATH}/libs/libtiflash-proxy/libtiflash_proxy.so" "${INSTALL_DIR}/libtiflash_proxy.so" -ldd "$install_dir/tiflash" -cd "$install_dir" -chrpath -d libtiflash_proxy.so "$install_dir/tiflash" -ldd "$install_dir/tiflash" +ldd "${INSTALL_DIR}/tiflash" + +ldd "${INSTALL_DIR}/tiflash" | grep 'libnsl.so' | grep '=>' | awk '{print $3}' | xargs -I {} cp {} "${INSTALL_DIR}" + +cd "${INSTALL_DIR}" +chrpath -d libtiflash_proxy.so "${INSTALL_DIR}/tiflash" +ldd "${INSTALL_DIR}/tiflash" +ls -lh "${INSTALL_DIR}" From 8918a8054ce0cbc5202406a2ae38beee795ea2e0 Mon Sep 17 00:00:00 2001 From: Ti Chi Robot Date: Mon, 24 Jan 2022 20:24:12 +0800 Subject: [PATCH 02/21] FIX exception: GC removed normal file which just created (#3216) (#3227) --- dbms/src/Storages/Page/PageFile.cpp | 2 +- dbms/src/Storages/Page/PageStorage.cpp | 31 +++++++++++++++----------- dbms/src/Storages/Page/PageStorage.h | 1 + 3 files changed, 20 insertions(+), 14 deletions(-) diff --git a/dbms/src/Storages/Page/PageFile.cpp b/dbms/src/Storages/Page/PageFile.cpp index 523eccf19c7..1828d7b56cb 100644 --- a/dbms/src/Storages/Page/PageFile.cpp +++ b/dbms/src/Storages/Page/PageFile.cpp @@ -860,6 +860,7 @@ PageFile::recover(const String & parent_path, const FileProviderPtr & file_provi LOG_INFO(log, "Broken page without data file, ignored: " + pf.dataPath()); return {{}, Type::Invalid}; } + return {pf, Type::Formal}; } else if (ss[0] == folder_prefix_checkpoint) @@ -870,7 +871,6 @@ PageFile::recover(const String & parent_path, const FileProviderPtr & file_provi LOG_INFO(log, "Broken page without meta file, ignored: " + pf.metaPath()); return {{}, Type::Invalid}; } - pf.type = Type::Checkpoint; return {pf, Type::Checkpoint}; } diff --git a/dbms/src/Storages/Page/PageStorage.cpp b/dbms/src/Storages/Page/PageStorage.cpp index 284bf538310..beb5599858a 100644 --- a/dbms/src/Storages/Page/PageStorage.cpp +++ b/dbms/src/Storages/Page/PageStorage.cpp @@ -181,20 +181,22 @@ PageFileSet PageStorage::listAllPageFiles(const FileProviderPtr & file_provi if (!option.ignore_checkpoint) page_files.insert(page_file); } - else + else if (page_file_type == PageFile::Type::Temp) { - // For Temp and Invalid if (option.remove_tmp_files) { - if (page_file_type == PageFile::Type::Temp) - { - page_file.deleteEncryptionInfo(); - } - // Remove temp and invalid file. + page_file.deleteEncryptionInfo(); + // Remove temp files. if (Poco::File file(directory + "/" + name); file.exists()) file.remove(true); } } + else + { + // Remove invalid files. + if (Poco::File file(directory + "/" + name); option.remove_invalid_files && file.exists()) + file.remove(true); + } } } @@ -250,8 +252,9 @@ void PageStorage::restore() #ifdef PAGE_STORAGE_UTIL_DEBUGGGING opt.remove_tmp_files = false; #endif - opt.ignore_legacy = false; - opt.ignore_checkpoint = false; + opt.ignore_legacy = false; + opt.ignore_checkpoint = false; + opt.remove_invalid_files = true; PageFileSet page_files = PageStorage::listAllPageFiles(file_provider, delegator, page_file_log, opt); /// Restore current version from both formal and legacy page files @@ -828,9 +831,10 @@ void PageStorage::drop() ListPageFilesOption opt; opt.ignore_checkpoint = false; - opt.ignore_legacy = false; - opt.remove_tmp_files = false; - auto page_files = PageStorage::listAllPageFiles(file_provider, delegator, page_file_log, opt); + opt.ignore_legacy = false; + opt.remove_tmp_files = false; + opt.remove_invalid_files = false; + auto page_files = PageStorage::listAllPageFiles(file_provider, delegator, page_file_log, opt); for (const auto & page_file : page_files) delegator->removePageFile(page_file.fileIdLevel(), page_file.getDiskSize(), false); @@ -962,7 +966,8 @@ bool PageStorage::gc(bool not_skip) } ListPageFilesOption opt; opt.remove_tmp_files = true; - auto page_files = PageStorage::listAllPageFiles(file_provider, delegator, page_file_log, opt); + opt.remove_invalid_files = false; + auto page_files = PageStorage::listAllPageFiles(file_provider, delegator, page_file_log, opt); if (unlikely(page_files.empty())) { // In case the directory are removed by accident diff --git a/dbms/src/Storages/Page/PageStorage.h b/dbms/src/Storages/Page/PageStorage.h index 274d48bc927..a98c56240e1 100644 --- a/dbms/src/Storages/Page/PageStorage.h +++ b/dbms/src/Storages/Page/PageStorage.h @@ -85,6 +85,7 @@ class PageStorage : private boost::noncopyable bool remove_tmp_files = false; bool ignore_legacy = false; bool ignore_checkpoint = false; + bool remove_invalid_files = false; }; using VersionedPageEntries = PageEntriesVersionSetWithDelta; From 98d4d4308ce7a9ca47639105c32b737b29fabf9d Mon Sep 17 00:00:00 2001 From: Ti Chi Robot Date: Mon, 24 Jan 2022 21:04:11 +0800 Subject: [PATCH 03/21] decrease the size of checkpoint file when removing it (#3175) (#3188) --- dbms/src/Storages/Page/PageStorage.cpp | 30 ++- dbms/src/Storages/Page/PageStorage.h | 2 +- dbms/src/Storages/Page/gc/LegacyCompactor.cpp | 5 +- dbms/src/Storages/PathPool.cpp | 191 ++++++++++++------ dbms/src/Storages/PathPool.h | 62 ++++-- dbms/src/Storages/tests/gtest_path_pool.cpp | 18 +- dbms/src/TestUtils/MockDiskDelegator.h | 22 +- 7 files changed, 221 insertions(+), 109 deletions(-) diff --git a/dbms/src/Storages/Page/PageStorage.cpp b/dbms/src/Storages/Page/PageStorage.cpp index beb5599858a..ad783bf7838 100644 --- a/dbms/src/Storages/Page/PageStorage.cpp +++ b/dbms/src/Storages/Page/PageStorage.cpp @@ -353,7 +353,9 @@ void PageStorage::restore() for (auto & pf : page_files_to_remove) LOG_TRACE(log, storage_name << pf.toString()); #else - archivePageFiles(page_files_to_remove); + // when restore `PageStorage`, the `PageFile` in `page_files_to_remove` is not counted in the total size, + // so no need to remove its' size here again. + archivePageFiles(page_files_to_remove, false); #endif removePageFilesIf(page_files, [&page_files_to_remove](const PageFile & pf) -> bool { return page_files_to_remove.count(pf) > 0; }); } @@ -366,8 +368,12 @@ void PageStorage::restore() // Only insert location of PageFile when it storing delta data for (auto & page_file : page_files) { + // Checkpoint file is always stored on `delegator`'s default path, so no need to insert it's location here size_t idx_in_delta_paths = delegator->addPageFileUsedSize( - page_file.fileIdLevel(), page_file.getDiskSize(), page_file.parentPath(), /*need_insert_location*/ true); + page_file.fileIdLevel(), + page_file.getDiskSize(), + page_file.parentPath(), + /*need_insert_location*/ page_file.getType() != PageFile::Type::Checkpoint); // Try best to reuse writable page files if (page_file.reusableForWrite() && isPageFileSizeFitsWritable(page_file, config)) { @@ -837,7 +843,11 @@ void PageStorage::drop() auto page_files = PageStorage::listAllPageFiles(file_provider, delegator, page_file_log, opt); for (const auto & page_file : page_files) - delegator->removePageFile(page_file.fileIdLevel(), page_file.getDiskSize(), false); + { + // All checkpoint file is stored on `delegator`'s default path and we didn't record it's location as other types of PageFile, + // so we need set `remove_from_default_path` to true to distinguish this situation. + delegator->removePageFile(page_file.fileIdLevel(), page_file.getDiskSize(), /*meta_left*/ false, /*remove_from_default_path*/ page_file.getType() == PageFile::Type::Checkpoint); + } /// FIXME: Note that these drop directories actions are not atomic, may leave some broken files on disk. @@ -1132,7 +1142,7 @@ bool PageStorage::gc(bool not_skip) PageFileSet page_files_to_archive; std::tie(page_files, page_files_to_archive, gc_context.num_bytes_written_in_compact_legacy) = compactor.tryCompact(std::move(page_files), writing_files_snapshot); - archivePageFiles(page_files_to_archive); + archivePageFiles(page_files_to_archive, true); gc_context.num_files_archive_in_compact_legacy = page_files_to_archive.size(); } @@ -1178,7 +1188,7 @@ bool PageStorage::gc(bool not_skip) return gc_context.compact_result.do_compaction; } -void PageStorage::archivePageFiles(const PageFileSet & page_files) +void PageStorage::archivePageFiles(const PageFileSet & page_files, bool remove_size) { const Poco::Path archive_path(delegator->defaultPath(), PageStorage::ARCHIVE_SUBDIR); Poco::File archive_dir(archive_path); @@ -1198,11 +1208,13 @@ void PageStorage::archivePageFiles(const PageFileSet & page_files) if (Poco::File file(path); file.exists()) { // To ensure the atomic of deletion, move to the `archive` dir first and then remove the PageFile dir. - auto file_size = page_file.getDiskSize(); + auto file_size = remove_size ? page_file.getDiskSize() : 0; file.moveTo(dest); file.remove(true); page_file.deleteEncryptionInfo(); - delegator->removePageFile(page_file.fileIdLevel(), file_size, false); + // All checkpoint file is stored on `delegator`'s default path and we didn't record it's location as other types of PageFile, + // so we need set `remove_from_default_path` to true to distinguish this situation. + delegator->removePageFile(page_file.fileIdLevel(), file_size, /*meta_left*/ false, /*remove_from_default_path*/ page_file.getType() == PageFile::Type::Checkpoint); } } LOG_INFO(log, storage_name << " archive " + DB::toString(page_files.size()) + " files to " + archive_path.toString()); @@ -1271,7 +1283,9 @@ PageStorage::gcRemoveObsoleteData(PageFileSet & page_file // Don't touch the that are used for the sorting then you could // work around by using a const_cast size_t bytes_removed = const_cast(page_file).setLegacy(); - delegator->removePageFile(page_id_and_lvl, bytes_removed, true); + // All checkpoint file is stored on `delegator`'s default path and we didn't record it's location as other types of PageFile, + // so we need set `remove_from_default_path` to true to distinguish this situation. + delegator->removePageFile(page_id_and_lvl, bytes_removed, /*meta_left*/ true, /*remove_from_default_path*/ page_file.getType() == PageFile::Type::Checkpoint); num_data_removed += 1; } } diff --git a/dbms/src/Storages/Page/PageStorage.h b/dbms/src/Storages/Page/PageStorage.h index a98c56240e1..aea59f94419 100644 --- a/dbms/src/Storages/Page/PageStorage.h +++ b/dbms/src/Storages/Page/PageStorage.h @@ -203,7 +203,7 @@ class PageStorage : private boost::noncopyable static constexpr const char * ARCHIVE_SUBDIR = "archive"; - void archivePageFiles(const PageFileSet & page_files_to_archive); + void archivePageFiles(const PageFileSet & page_files_to_archive, bool remove_size); std::tuple // gcRemoveObsoleteData(PageFileSet & page_files, diff --git a/dbms/src/Storages/Page/gc/LegacyCompactor.cpp b/dbms/src/Storages/Page/gc/LegacyCompactor.cpp index d0847044e31..e5960eb09ad 100644 --- a/dbms/src/Storages/Page/gc/LegacyCompactor.cpp +++ b/dbms/src/Storages/Page/gc/LegacyCompactor.cpp @@ -91,7 +91,10 @@ LegacyCompactor::tryCompact( // if (!info.empty()) { bytes_written = writeToCheckpoint(storage_path, checkpoint_id, std::move(wb), file_provider, page_file_log); - // Don't need to insert location since Checkpoint PageFile won't be read except using listAllPageFiles in `PageStorage::restore` + // 1. Don't need to insert location since Checkpoint PageFile won't be read except using listAllPageFiles in `PageStorage::restore` + // 2. Also, `checkpoint_id` is the same as the largest page file compacted, + // so insert the checkpoint file's location here will overwrite the old page file's location and may incur error when deploy on multi disk environment + // 3. And we always store checkpoint file on `delegator`'s default path, so we can just remove it from the default path when removing it delegator->addPageFileUsedSize(checkpoint_id, bytes_written, storage_path, /*need_insert_location=*/false); } diff --git a/dbms/src/Storages/PathPool.cpp b/dbms/src/Storages/PathPool.cpp index 6a84ea40484..d7b3f2e5332 100644 --- a/dbms/src/Storages/PathPool.cpp +++ b/dbms/src/Storages/PathPool.cpp @@ -18,7 +18,6 @@ namespace DB { - namespace ErrorCodes { extern const int LOGICAL_ERROR; @@ -31,18 +30,26 @@ inline String removeTrailingSlash(String s) return s; } -inline String getNormalizedPath(const String & s) { return removeTrailingSlash(Poco::Path{s}.toString()); } +inline String getNormalizedPath(const String & s) +{ + return removeTrailingSlash(Poco::Path{s}.toString()); +} // Constructor to be used during initialization -PathPool::PathPool(const Strings & main_data_paths_, const Strings & latest_data_paths_, const Strings & kvstore_paths_, // - PathCapacityMetricsPtr global_capacity_, FileProviderPtr file_provider_, bool enable_raft_compatible_mode_) - : main_data_paths(main_data_paths_), - latest_data_paths(latest_data_paths_), - kvstore_paths(kvstore_paths_), - enable_raft_compatible_mode(enable_raft_compatible_mode_), - global_capacity(global_capacity_), - file_provider(file_provider_), - log(&Poco::Logger::get("PathPool")) +PathPool::PathPool( + const Strings & main_data_paths_, + const Strings & latest_data_paths_, + const Strings & kvstore_paths_, // + PathCapacityMetricsPtr global_capacity_, + FileProviderPtr file_provider_, + bool enable_raft_compatible_mode_) + : main_data_paths(main_data_paths_) + , latest_data_paths(latest_data_paths_) + , kvstore_paths(kvstore_paths_) + , enable_raft_compatible_mode(enable_raft_compatible_mode_) + , global_capacity(global_capacity_) + , file_provider(file_provider_) + , log(&Poco::Logger::get("PathPool")) { if (kvstore_paths.empty()) { @@ -74,22 +81,29 @@ Strings PathPool::listPaths() const return paths; } -PSDiskDelegatorPtr PathPool::getPSDiskDelegatorRaft() { return std::make_shared(*this); } +PSDiskDelegatorPtr PathPool::getPSDiskDelegatorRaft() +{ + return std::make_shared(*this); +} //========================================================================================== // StoragePathPool //========================================================================================== -StoragePathPool::StoragePathPool( // - const Strings & main_data_paths, const Strings & latest_data_paths, // - String database_, String table_, bool path_need_database_name_, // - PathCapacityMetricsPtr global_capacity_, FileProviderPtr file_provider_) - : database(std::move(database_)), - table(std::move(table_)), - path_need_database_name(path_need_database_name_), - global_capacity(std::move(global_capacity_)), - file_provider(std::move(file_provider_)), - log(&Poco::Logger::get("StoragePathPool")) +StoragePathPool::StoragePathPool( // + const Strings & main_data_paths, + const Strings & latest_data_paths, // + String database_, + String table_, + bool path_need_database_name_, // + PathCapacityMetricsPtr global_capacity_, + FileProviderPtr file_provider_) + : database(std::move(database_)) + , table(std::move(table_)) + , path_need_database_name(path_need_database_name_) + , global_capacity(std::move(global_capacity_)) + , file_provider(std::move(file_provider_)) + , log(&Poco::Logger::get("StoragePathPool")) { if (unlikely(database.empty() || table.empty())) throw Exception("Can NOT create StoragePathPool [database=" + database + "] [table=" + table + "]", ErrorCodes::LOGICAL_ERROR); @@ -109,15 +123,15 @@ StoragePathPool::StoragePathPool( // } StoragePathPool::StoragePathPool(const StoragePathPool & rhs) - : main_path_infos(rhs.main_path_infos), - latest_path_infos(rhs.latest_path_infos), - dt_file_path_map(rhs.dt_file_path_map), - database(rhs.database), - table(rhs.table), - path_need_database_name(rhs.path_need_database_name), - global_capacity(rhs.global_capacity), - file_provider(rhs.file_provider), - log(rhs.log) + : main_path_infos(rhs.main_path_infos) + , latest_path_infos(rhs.latest_path_infos) + , dt_file_path_map(rhs.dt_file_path_map) + , database(rhs.database) + , table(rhs.table) + , path_need_database_name(rhs.path_need_database_name) + , global_capacity(rhs.global_capacity) + , file_provider(rhs.file_provider) + , log(rhs.log) {} StoragePathPool & StoragePathPool::operator=(const StoragePathPool & rhs) @@ -269,8 +283,7 @@ void StoragePathPool::renamePath(const String & old_path, const String & new_pat //========================================================================================== template -String genericChoosePath(const std::vector & paths, const PathCapacityMetricsPtr & global_capacity, - std::function & paths, size_t idx)> path_generator, Poco::Logger * log, const String & log_msg) +String genericChoosePath(const std::vector & paths, const PathCapacityMetricsPtr & global_capacity, std::function & paths, size_t idx)> path_generator, Poco::Logger * log, const String & log_msg) { if (paths.size() == 1) return path_generator(paths, 0); @@ -399,9 +412,15 @@ void StableDiskDelegator::removeDTFile(UInt64 file_id) // Delta data //========================================================================================== -size_t PSDiskDelegatorMulti::numPaths() const { return pool.latest_path_infos.size(); } +size_t PSDiskDelegatorMulti::numPaths() const +{ + return pool.latest_path_infos.size(); +} -String PSDiskDelegatorMulti::defaultPath() const { return pool.latest_path_infos[0].path + "/" + path_prefix; } +String PSDiskDelegatorMulti::defaultPath() const +{ + return pool.latest_path_infos[default_path_index].path + "/" + path_prefix; +} Strings PSDiskDelegatorMulti::listPaths() const { @@ -417,7 +436,9 @@ Strings PSDiskDelegatorMulti::listPaths() const String PSDiskDelegatorMulti::choosePath(const PageFileIdAndLevel & id_lvl) { std::function path_generator = - [this](const StoragePathPool::LatestPathInfos & paths, size_t idx) -> String { return paths[idx].path + "/" + this->path_prefix; }; + [this](const StoragePathPool::LatestPathInfos & paths, size_t idx) -> String { + return paths[idx].path + "/" + this->path_prefix; + }; { std::lock_guard lock{pool.mutex}; @@ -431,7 +452,10 @@ String PSDiskDelegatorMulti::choosePath(const PageFileIdAndLevel & id_lvl) } size_t PSDiskDelegatorMulti::addPageFileUsedSize( - const PageFileIdAndLevel & id_lvl, size_t size_to_add, const String & pf_parent_path, bool need_insert_location) + const PageFileIdAndLevel & id_lvl, + size_t size_to_add, + const String & pf_parent_path, + bool need_insert_location) { // Get a normalized path without `path_prefix` and trailing '/' String upper_path = removeTrailingSlash(Poco::Path(pf_parent_path).parent().toString()); @@ -467,26 +491,39 @@ String PSDiskDelegatorMulti::getPageFilePath(const PageFileIdAndLevel & id_lvl) throw Exception("Can not find path for PageFile [id=" + toString(id_lvl.first) + "_" + toString(id_lvl.second) + "]"); } -void PSDiskDelegatorMulti::removePageFile(const PageFileIdAndLevel & id_lvl, size_t file_size, bool meta_left) +void PSDiskDelegatorMulti::removePageFile(const PageFileIdAndLevel & id_lvl, size_t file_size, bool meta_left, bool remove_from_default_path) { std::lock_guard lock{pool.mutex}; - auto iter = page_path_map.find(id_lvl); - if (unlikely(iter == page_path_map.end())) - return; - auto index = iter->second; - if (!meta_left) - page_path_map.erase(iter); - - pool.global_capacity->freeUsedSize(pool.latest_path_infos[index].path, file_size); + if (remove_from_default_path) + { + pool.global_capacity->freeUsedSize(pool.latest_path_infos[default_path_index].path, file_size); + } + else + { + auto iter = page_path_map.find(id_lvl); + if (unlikely(iter == page_path_map.end())) + return; + auto index = iter->second; + if (!meta_left) + page_path_map.erase(iter); + + pool.global_capacity->freeUsedSize(pool.latest_path_infos[index].path, file_size); + } } //========================================================================================== // Normal data //========================================================================================== -size_t PSDiskDelegatorSingle::numPaths() const { return 1; } +size_t PSDiskDelegatorSingle::numPaths() const +{ + return 1; +} -String PSDiskDelegatorSingle::defaultPath() const { return pool.latest_path_infos[0].path + "/" + path_prefix; } +String PSDiskDelegatorSingle::defaultPath() const +{ + return pool.latest_path_infos[0].path + "/" + path_prefix; +} Strings PSDiskDelegatorSingle::listPaths() const { @@ -502,7 +539,10 @@ String PSDiskDelegatorSingle::choosePath(const PageFileIdAndLevel & /*id_lvl*/) } size_t PSDiskDelegatorSingle::addPageFileUsedSize( - const PageFileIdAndLevel & /*id_lvl*/, size_t size_to_add, const String & pf_parent_path, bool /*need_insert_location*/) + const PageFileIdAndLevel & /*id_lvl*/, + size_t size_to_add, + const String & pf_parent_path, + bool /*need_insert_location*/) { // In this case, inserting to page_path_map seems useless. // Simply add used size for global capacity is OK. @@ -515,7 +555,7 @@ String PSDiskDelegatorSingle::getPageFilePath(const PageFileIdAndLevel & /*id_lv return pool.latest_path_infos[0].path + "/" + path_prefix; } -void PSDiskDelegatorSingle::removePageFile(const PageFileIdAndLevel & /*id_lvl*/, size_t file_size, bool /*meta_left*/) +void PSDiskDelegatorSingle::removePageFile(const PageFileIdAndLevel & /*id_lvl*/, size_t file_size, bool /*meta_left*/, bool /*remove_from_default_path*/) { pool.global_capacity->freeUsedSize(pool.latest_path_infos[0].path, file_size); } @@ -523,7 +563,8 @@ void PSDiskDelegatorSingle::removePageFile(const PageFileIdAndLevel & /*id_lvl*/ //========================================================================================== // Raft data //========================================================================================== -PSDiskDelegatorRaft::PSDiskDelegatorRaft(PathPool & pool_) : pool(pool_) +PSDiskDelegatorRaft::PSDiskDelegatorRaft(PathPool & pool_) + : pool(pool_) { for (const auto & s : pool.kvstore_paths) { @@ -534,16 +575,27 @@ PSDiskDelegatorRaft::PSDiskDelegatorRaft(PathPool & pool_) : pool(pool_) } } -size_t PSDiskDelegatorRaft::numPaths() const { return raft_path_infos.size(); } +size_t PSDiskDelegatorRaft::numPaths() const +{ + return raft_path_infos.size(); +} -String PSDiskDelegatorRaft::defaultPath() const { return raft_path_infos[0].path; } +String PSDiskDelegatorRaft::defaultPath() const +{ + return raft_path_infos[default_path_index].path; +} -Strings PSDiskDelegatorRaft::listPaths() const { return pool.kvstore_paths; } +Strings PSDiskDelegatorRaft::listPaths() const +{ + return pool.kvstore_paths; +} String PSDiskDelegatorRaft::choosePath(const PageFileIdAndLevel & id_lvl) { std::function path_generator - = [](const RaftPathInfos & paths, size_t idx) -> String { return paths[idx].path; }; + = [](const RaftPathInfos & paths, size_t idx) -> String { + return paths[idx].path; + }; { std::lock_guard lock{mutex}; @@ -558,7 +610,10 @@ String PSDiskDelegatorRaft::choosePath(const PageFileIdAndLevel & id_lvl) } size_t PSDiskDelegatorRaft::addPageFileUsedSize( - const PageFileIdAndLevel & id_lvl, size_t size_to_add, const String & pf_parent_path, bool need_insert_location) + const PageFileIdAndLevel & id_lvl, + size_t size_to_add, + const String & pf_parent_path, + bool need_insert_location) { // Get a normalized path without trailing '/' String upper_path = getNormalizedPath(pf_parent_path); @@ -594,17 +649,23 @@ String PSDiskDelegatorRaft::getPageFilePath(const PageFileIdAndLevel & id_lvl) c throw Exception("Can not find path for PageFile [id=" + toString(id_lvl.first) + "_" + toString(id_lvl.second) + "]"); } -void PSDiskDelegatorRaft::removePageFile(const PageFileIdAndLevel & id_lvl, size_t file_size, bool meta_left) +void PSDiskDelegatorRaft::removePageFile(const PageFileIdAndLevel & id_lvl, size_t file_size, bool meta_left, bool remove_from_default_path) { std::lock_guard lock{mutex}; - auto iter = page_path_map.find(id_lvl); - if (unlikely(iter == page_path_map.end())) - return; - auto index = iter->second; - if (!meta_left) - page_path_map.erase(iter); - - pool.global_capacity->freeUsedSize(raft_path_infos[index].path, file_size); + if (remove_from_default_path) + { + pool.global_capacity->freeUsedSize(raft_path_infos[default_path_index].path, file_size); + } + else + { + auto iter = page_path_map.find(id_lvl); + if (unlikely(iter == page_path_map.end())) + return; + auto index = iter->second; + if (!meta_left) + page_path_map.erase(iter); + pool.global_capacity->freeUsedSize(raft_path_infos[index].path, file_size); + } } } // namespace DB diff --git a/dbms/src/Storages/PathPool.h b/dbms/src/Storages/PathPool.h index ab26ff48f45..82c464a3eff 100644 --- a/dbms/src/Storages/PathPool.h +++ b/dbms/src/Storages/PathPool.h @@ -12,7 +12,6 @@ class Logger; namespace DB { - class PathCapacityMetrics; using PathCapacityMetricsPtr = std::shared_ptr; class FileProvider; @@ -41,10 +40,12 @@ class PathPool PathPool() = default; // Constructor to be used during initialization - PathPool( // - const Strings & main_data_paths, const Strings & latest_data_paths, // - const Strings & kvstore_paths, // - PathCapacityMetricsPtr global_capacity_, FileProviderPtr file_provider_, // + PathPool( // + const Strings & main_data_paths, + const Strings & latest_data_paths, // + const Strings & kvstore_paths, // + PathCapacityMetricsPtr global_capacity_, + FileProviderPtr file_provider_, // bool enable_raft_compatible_mode_ = false); // Constructor to create PathPool for one Storage @@ -90,7 +91,9 @@ class PathPool class StableDiskDelegator : private boost::noncopyable { public: - StableDiskDelegator(StoragePathPool & pool_) : pool(pool_) {} + StableDiskDelegator(StoragePathPool & pool_) + : pool(pool_) + {} Strings listPaths() const; @@ -122,18 +125,24 @@ class PSDiskDelegator : private boost::noncopyable virtual String choosePath(const PageFileIdAndLevel & id_lvl) = 0; virtual size_t addPageFileUsedSize( - const PageFileIdAndLevel & id_lvl, size_t size_to_add, const String & pf_parent_path, bool need_insert_location) + const PageFileIdAndLevel & id_lvl, + size_t size_to_add, + const String & pf_parent_path, + bool need_insert_location) = 0; virtual String getPageFilePath(const PageFileIdAndLevel & id_lvl) const = 0; - virtual void removePageFile(const PageFileIdAndLevel & id_lvl, size_t file_size, bool meta_left) = 0; + virtual void removePageFile(const PageFileIdAndLevel & id_lvl, size_t file_size, bool meta_left, bool remove_from_default_path) = 0; }; class PSDiskDelegatorMulti : public PSDiskDelegator { public: - PSDiskDelegatorMulti(StoragePathPool & pool_, String prefix) : pool(pool_), path_prefix(std::move(prefix)) {} + PSDiskDelegatorMulti(StoragePathPool & pool_, String prefix) + : pool(pool_) + , path_prefix(std::move(prefix)) + {} size_t numPaths() const override; @@ -144,23 +153,30 @@ class PSDiskDelegatorMulti : public PSDiskDelegator String choosePath(const PageFileIdAndLevel & id_lvl) override; size_t addPageFileUsedSize( - const PageFileIdAndLevel & id_lvl, size_t size_to_add, const String & pf_parent_path, bool need_insert_location) override; + const PageFileIdAndLevel & id_lvl, + size_t size_to_add, + const String & pf_parent_path, + bool need_insert_location) override; String getPageFilePath(const PageFileIdAndLevel & id_lvl) const override; - void removePageFile(const PageFileIdAndLevel & id_lvl, size_t file_size, bool meta_left) override; + void removePageFile(const PageFileIdAndLevel & id_lvl, size_t file_size, bool meta_left, bool remove_from_default_path) override; private: StoragePathPool & pool; const String path_prefix; // PageFileID -> path index PathPool::PageFilePathMap page_path_map; + const UInt32 default_path_index = 0; }; class PSDiskDelegatorSingle : public PSDiskDelegator { public: - PSDiskDelegatorSingle(StoragePathPool & pool_, String prefix) : pool(pool_), path_prefix(std::move(prefix)) {} + PSDiskDelegatorSingle(StoragePathPool & pool_, String prefix) + : pool(pool_) + , path_prefix(std::move(prefix)) + {} size_t numPaths() const override; @@ -171,11 +187,14 @@ class PSDiskDelegatorSingle : public PSDiskDelegator String choosePath(const PageFileIdAndLevel & id_lvl) override; size_t addPageFileUsedSize( - const PageFileIdAndLevel & id_lvl, size_t size_to_add, const String & pf_parent_path, bool need_insert_location) override; + const PageFileIdAndLevel & id_lvl, + size_t size_to_add, + const String & pf_parent_path, + bool need_insert_location) override; String getPageFilePath(const PageFileIdAndLevel & id_lvl) const override; - void removePageFile(const PageFileIdAndLevel & id_lvl, size_t file_size, bool meta_left) override; + void removePageFile(const PageFileIdAndLevel & id_lvl, size_t file_size, bool meta_left, bool remove_from_default_path) override; private: StoragePathPool & pool; @@ -196,11 +215,14 @@ class PSDiskDelegatorRaft : public PSDiskDelegator String choosePath(const PageFileIdAndLevel & id_lvl) override; size_t addPageFileUsedSize( - const PageFileIdAndLevel & id_lvl, size_t size_to_add, const String & pf_parent_path, bool need_insert_location) override; + const PageFileIdAndLevel & id_lvl, + size_t size_to_add, + const String & pf_parent_path, + bool need_insert_location) override; String getPageFilePath(const PageFileIdAndLevel & id_lvl) const override; - void removePageFile(const PageFileIdAndLevel & id_lvl, size_t file_size, bool meta_left) override; + void removePageFile(const PageFileIdAndLevel & id_lvl, size_t file_size, bool meta_left, bool remove_from_default_path) override; private: struct RaftPathInfo @@ -214,6 +236,7 @@ class PSDiskDelegatorRaft : public PSDiskDelegator RaftPathInfos raft_path_infos; // PageFileID -> path index PathPool::PageFilePathMap page_path_map; + const UInt32 default_path_index = 0; }; /// A class to manage paths for the specified storage. @@ -223,8 +246,11 @@ class StoragePathPool static constexpr const char * STABLE_FOLDER_NAME = "stable"; StoragePathPool(const Strings & main_data_paths, const Strings & latest_data_paths, // - String database_, String table_, bool path_need_database_name_, // - PathCapacityMetricsPtr global_capacity_, FileProviderPtr file_provider_); + String database_, + String table_, + bool path_need_database_name_, // + PathCapacityMetricsPtr global_capacity_, + FileProviderPtr file_provider_); StoragePathPool(const StoragePathPool & rhs); StoragePathPool & operator=(const StoragePathPool & rhs); diff --git a/dbms/src/Storages/tests/gtest_path_pool.cpp b/dbms/src/Storages/tests/gtest_path_pool.cpp index 119f1293a09..5cdda3fa916 100644 --- a/dbms/src/Storages/tests/gtest_path_pool.cpp +++ b/dbms/src/Storages/tests/gtest_path_pool.cpp @@ -10,11 +10,12 @@ namespace DB { namespace tests { - class PathPool_test : public ::testing::Test { public: - PathPool_test() : log(&Poco::Logger::get("PathPool_test")) {} + PathPool_test() + : log(&Poco::Logger::get("PathPool_test")) + {} static void SetUpTestCase() {} @@ -103,7 +104,7 @@ try for (size_t i = 0; i < TEST_NUMBER_FOR_CHOOSE; ++i) { PageFileIdAndLevel id{i, 0}; - delegate->removePageFile(id, bytes_written, false); + delegate->removePageFile(id, bytes_written, false, false); } } // PS-single delegate @@ -137,7 +138,7 @@ try for (size_t i = 0; i < TEST_NUMBER_FOR_CHOOSE; ++i) { PageFileIdAndLevel id{i, 0}; - delegate->removePageFile(id, bytes_written, false); + delegate->removePageFile(id, bytes_written, false, false); } } // PS-Raft delegate @@ -171,7 +172,7 @@ try for (size_t i = 0; i < TEST_NUMBER_FOR_CHOOSE; ++i) { PageFileIdAndLevel id{i, 0}; - delegate->removePageFile(id, bytes_written, false); + delegate->removePageFile(id, bytes_written, false, false); } } } @@ -247,7 +248,7 @@ try for (size_t i = 0; i < TEST_NUMBER_FOR_CHOOSE; ++i) { PageFileIdAndLevel id{i, 0}; - delegate->removePageFile(id, bytes_written, false); + delegate->removePageFile(id, bytes_written, false, false); } } // PS-single delegate @@ -281,7 +282,7 @@ try for (size_t i = 0; i < TEST_NUMBER_FOR_CHOOSE; ++i) { PageFileIdAndLevel id{i, 0}; - delegate->removePageFile(id, bytes_written, false); + delegate->removePageFile(id, bytes_written, false, false); } } // PS-Raft delegate @@ -315,11 +316,10 @@ try for (size_t i = 0; i < TEST_NUMBER_FOR_CHOOSE; ++i) { PageFileIdAndLevel id{i, 0}; - delegate->removePageFile(id, bytes_written, false); + delegate->removePageFile(id, bytes_written, false, false); } } } CATCH - } // namespace tests } // namespace DB diff --git a/dbms/src/TestUtils/MockDiskDelegator.h b/dbms/src/TestUtils/MockDiskDelegator.h index f222a9c2716..885cc5a6b1d 100644 --- a/dbms/src/TestUtils/MockDiskDelegator.h +++ b/dbms/src/TestUtils/MockDiskDelegator.h @@ -5,16 +5,17 @@ namespace DB::tests { - class MockDiskDelegatorSingle final : public PSDiskDelegator { public: - MockDiskDelegatorSingle(String path_) : path(std::move(path_)) {} + MockDiskDelegatorSingle(String path_) + : path(std::move(path_)) + {} size_t numPaths() const { return 1; } String defaultPath() const { return path; } String getPageFilePath(const PageFileIdAndLevel & /*id_lvl*/) const { return path; } - void removePageFile(const PageFileIdAndLevel & /*id_lvl*/, size_t /*file_size*/, bool /*meta_left*/) {} + void removePageFile(const PageFileIdAndLevel & /*id_lvl*/, size_t /*file_size*/, bool /*meta_left*/, bool /*remove_from_default_path*/) {} Strings listPaths() const { Strings paths; @@ -23,7 +24,10 @@ class MockDiskDelegatorSingle final : public PSDiskDelegator } String choosePath(const PageFileIdAndLevel & /*id_lvl*/) { return path; } size_t addPageFileUsedSize( - const PageFileIdAndLevel & /*id_lvl*/, size_t /*size_to_add*/, const String & /*pf_parent_path*/, bool /*need_insert_location*/) + const PageFileIdAndLevel & /*id_lvl*/, + size_t /*size_to_add*/, + const String & /*pf_parent_path*/, + bool /*need_insert_location*/) { return 0; } @@ -35,7 +39,8 @@ class MockDiskDelegatorSingle final : public PSDiskDelegator class MockDiskDelegatorMulti final : public PSDiskDelegator { public: - MockDiskDelegatorMulti(Strings paths_) : paths(std::move(paths_)) + MockDiskDelegatorMulti(Strings paths_) + : paths(std::move(paths_)) { if (paths.empty()) throw Exception("Should not generate MockDiskDelegatorMulti with empty paths"); @@ -44,7 +49,7 @@ class MockDiskDelegatorMulti final : public PSDiskDelegator size_t numPaths() const { return paths.size(); } String defaultPath() const { return paths[0]; } String getPageFilePath(const PageFileIdAndLevel & /*id_lvl*/) const { throw Exception("Not implemented"); } - void removePageFile(const PageFileIdAndLevel & /*id_lvl*/, size_t /*file_size*/, bool /*meta_left*/) {} + void removePageFile(const PageFileIdAndLevel & /*id_lvl*/, size_t /*file_size*/, bool /*meta_left*/, bool /*remove_from_default_path*/) {} Strings listPaths() const { return paths; } String choosePath(const PageFileIdAndLevel & /*id_lvl*/) { @@ -53,7 +58,10 @@ class MockDiskDelegatorMulti final : public PSDiskDelegator return chosen; } size_t addPageFileUsedSize( - const PageFileIdAndLevel & /*id_lvl*/, size_t /*size_to_add*/, const String & /*pf_parent_path*/, bool /*need_insert_location*/) + const PageFileIdAndLevel & /*id_lvl*/, + size_t /*size_to_add*/, + const String & /*pf_parent_path*/, + bool /*need_insert_location*/) { return 0; } From b2d914af10c41c9fe8ee5b2323635cffd786dfa8 Mon Sep 17 00:00:00 2001 From: Ti Chi Robot Date: Mon, 24 Jan 2022 21:44:11 +0800 Subject: [PATCH 04/21] fix null case for GetJsonLength (#3734) (#3736) --- dbms/src/Storages/Transaction/JSONCodec.cpp | 138 +++++++++++--------- 1 file changed, 75 insertions(+), 63 deletions(-) diff --git a/dbms/src/Storages/Transaction/JSONCodec.cpp b/dbms/src/Storages/Transaction/JSONCodec.cpp index 5c7755f2b5c..382eec98a00 100644 --- a/dbms/src/Storages/Transaction/JSONCodec.cpp +++ b/dbms/src/Storages/Transaction/JSONCodec.cpp @@ -61,7 +61,6 @@ */ namespace DB { - namespace ErrorCodes { extern const int LOGICAL_ERROR; @@ -69,16 +68,16 @@ extern const int LOGICAL_ERROR; using JsonVar = Poco::Dynamic::Var; -extern const UInt8 TYPE_CODE_OBJECT = 0x01; // TypeCodeObject indicates the JSON is an object. -extern const UInt8 TYPE_CODE_ARRAY = 0x03; // TypeCodeArray indicates the JSON is an array. +extern const UInt8 TYPE_CODE_OBJECT = 0x01; // TypeCodeObject indicates the JSON is an object. +extern const UInt8 TYPE_CODE_ARRAY = 0x03; // TypeCodeArray indicates the JSON is an array. extern const UInt8 TYPE_CODE_LITERAL = 0x04; // TypeCodeLiteral indicates the JSON is a literal. -extern const UInt8 TYPE_CODE_INT64 = 0x09; // TypeCodeInt64 indicates the JSON is a signed integer. -extern const UInt8 TYPE_CODE_UINT64 = 0x0a; // TypeCodeUint64 indicates the JSON is a unsigned integer. +extern const UInt8 TYPE_CODE_INT64 = 0x09; // TypeCodeInt64 indicates the JSON is a signed integer. +extern const UInt8 TYPE_CODE_UINT64 = 0x0a; // TypeCodeUint64 indicates the JSON is a unsigned integer. extern const UInt8 TYPE_CODE_FLOAT64 = 0x0b; // TypeCodeFloat64 indicates the JSON is a double float number. -extern const UInt8 TYPE_CODE_STRING = 0x0c; // TypeCodeString indicates the JSON is a string. -extern const UInt8 LITERAL_NIL = 0x00; // LiteralNil represents JSON null. -extern const UInt8 LITERAL_TRUE = 0x01; // LiteralTrue represents JSON true. -extern const UInt8 LITERAL_FALSE = 0x02; // LiteralFalse represents JSON false. +extern const UInt8 TYPE_CODE_STRING = 0x0c; // TypeCodeString indicates the JSON is a string. +extern const UInt8 LITERAL_NIL = 0x00; // LiteralNil represents JSON null. +extern const UInt8 LITERAL_TRUE = 0x01; // LiteralTrue represents JSON true. +extern const UInt8 LITERAL_FALSE = 0x02; // LiteralFalse represents JSON false. constexpr size_t VALUE_ENTRY_SIZE = 5; constexpr size_t KEY_ENTRY_LENGTH = 6; @@ -161,22 +160,22 @@ JsonVar decodeValue(UInt8 type, size_t & cursor, const String & raw_value) { switch (type) // JSON Root element type { - case TYPE_CODE_OBJECT: - return decodeObject(cursor, raw_value); - case TYPE_CODE_ARRAY: - return decodeArray(cursor, raw_value); - case TYPE_CODE_LITERAL: - return decodeLiteral(cursor, raw_value); - case TYPE_CODE_INT64: - return JsonVar(decodeNumeric(cursor, raw_value)); - case TYPE_CODE_UINT64: - return JsonVar(decodeNumeric(cursor, raw_value)); - case TYPE_CODE_FLOAT64: - return JsonVar(decodeNumeric(cursor, raw_value)); - case TYPE_CODE_STRING: - return JsonVar(decodeString(cursor, raw_value)); - default: - throw Exception("decodeValue: Unknown JSON Element Type:" + std::to_string(type), ErrorCodes::LOGICAL_ERROR); + case TYPE_CODE_OBJECT: + return decodeObject(cursor, raw_value); + case TYPE_CODE_ARRAY: + return decodeArray(cursor, raw_value); + case TYPE_CODE_LITERAL: + return decodeLiteral(cursor, raw_value); + case TYPE_CODE_INT64: + return JsonVar(decodeNumeric(cursor, raw_value)); + case TYPE_CODE_UINT64: + return JsonVar(decodeNumeric(cursor, raw_value)); + case TYPE_CODE_FLOAT64: + return JsonVar(decodeNumeric(cursor, raw_value)); + case TYPE_CODE_STRING: + return JsonVar(decodeString(cursor, raw_value)); + default: + throw Exception("decodeValue: Unknown JSON Element Type:" + std::to_string(type), ErrorCodes::LOGICAL_ERROR); } } @@ -185,18 +184,21 @@ inline JsonVar decodeLiteral(size_t & cursor, const String & raw_value) UInt8 type = raw_value[cursor++]; switch (type) { - case LITERAL_FALSE: - return JsonVar(false); - case LITERAL_NIL: - return JsonVar(); - case LITERAL_TRUE: - return JsonVar(true); - default: - throw Exception("decodeLiteral: Unknown JSON Literal Type:" + std::to_string(type), ErrorCodes::LOGICAL_ERROR); + case LITERAL_FALSE: + return JsonVar(false); + case LITERAL_NIL: + return JsonVar(); + case LITERAL_TRUE: + return JsonVar(true); + default: + throw Exception("decodeLiteral: Unknown JSON Literal Type:" + std::to_string(type), ErrorCodes::LOGICAL_ERROR); } } -inline String decodeString(size_t base, const String & raw_value, size_t length) { return String(raw_value, base, length); } +inline String decodeString(size_t base, const String & raw_value, size_t length) +{ + return String(raw_value, base, length); +} inline String decodeString(size_t & cursor, const String & raw_value) { @@ -239,28 +241,28 @@ typename need_decode::type DecodeJson(size_t & cursor, const String & switch (type) // JSON Root element type { - case TYPE_CODE_OBJECT: - cursor += 4; - size = decodeNumeric(cursor, raw_value); - break; - case TYPE_CODE_ARRAY: - cursor += 4; - size = decodeNumeric(cursor, raw_value); - break; - case TYPE_CODE_LITERAL: - size = 1; - break; - case TYPE_CODE_INT64: - case TYPE_CODE_UINT64: - case TYPE_CODE_FLOAT64: - size = 8; - break; - case TYPE_CODE_STRING: - size = DecodeVarUInt(cursor, raw_value); - size += (cursor - base - 1); - break; - default: - throw Exception("DecodeJsonBinary: Unknown JSON Element Type:" + std::to_string(type), ErrorCodes::LOGICAL_ERROR); + case TYPE_CODE_OBJECT: + cursor += 4; + size = decodeNumeric(cursor, raw_value); + break; + case TYPE_CODE_ARRAY: + cursor += 4; + size = decodeNumeric(cursor, raw_value); + break; + case TYPE_CODE_LITERAL: + size = 1; + break; + case TYPE_CODE_INT64: + case TYPE_CODE_UINT64: + case TYPE_CODE_FLOAT64: + size = 8; + break; + case TYPE_CODE_STRING: + size = DecodeVarUInt(cursor, raw_value); + size += (cursor - base - 1); + break; + default: + throw Exception("DecodeJsonBinary: Unknown JSON Element Type:" + std::to_string(type), ErrorCodes::LOGICAL_ERROR); } size++; @@ -271,19 +273,29 @@ typename need_decode::type DecodeJson(size_t & cursor, const String & return static_cast::type>(raw_value.substr(base, size)); } -void SkipJson(size_t & cursor, const String & raw_value) { DecodeJson(cursor, raw_value); } +void SkipJson(size_t & cursor, const String & raw_value) +{ + DecodeJson(cursor, raw_value); +} -String DecodeJsonAsBinary(size_t & cursor, const String & raw_value) { return DecodeJson(cursor, raw_value); } +String DecodeJsonAsBinary(size_t & cursor, const String & raw_value) +{ + return DecodeJson(cursor, raw_value); +} UInt64 GetJsonLength(std::string_view raw_value) { + if (raw_value.empty()) + { + return 0; + } switch (raw_value[0]) // JSON Root element type { - case TYPE_CODE_OBJECT: - case TYPE_CODE_ARRAY: - return *(reinterpret_cast(&raw_value[1])); - default: - return 1; + case TYPE_CODE_OBJECT: + case TYPE_CODE_ARRAY: + return *(reinterpret_cast(&raw_value[1])); + default: + return 1; } } From 8f71be7b35168a2eb8bf33589fcc7f3452d2cdc5 Mon Sep 17 00:00:00 2001 From: Ti Chi Robot Date: Mon, 24 Jan 2022 22:40:12 +0800 Subject: [PATCH 05/21] Fix sync schema exception. (#2670) (#2673) --- dbms/src/Common/TiFlashException.h | 3 +++ dbms/src/Storages/Transaction/SchemaBuilder.cpp | 16 ++++++++-------- dbms/src/Storages/Transaction/TiDBSchemaSyncer.h | 9 +++++++++ 3 files changed, 20 insertions(+), 8 deletions(-) diff --git a/dbms/src/Common/TiFlashException.h b/dbms/src/Common/TiFlashException.h index 0508889ae64..01923903f58 100644 --- a/dbms/src/Common/TiFlashException.h +++ b/dbms/src/Common/TiFlashException.h @@ -55,6 +55,9 @@ namespace DB E(Internal, "TiFlash DDL internal error.", \ "Please contact with developer, \n" \ "better providing information about your cluster(log, topology information etc.).", \ + ""); \ + E(StaleSchema, "Schema is stale and need to reload all schema.", \ + "This error will be recover by reload all schema automatically.", \ "");) \ C(Coprocessor, \ E(BadRequest, "Bad TiDB coprocessor request.", \ diff --git a/dbms/src/Storages/Transaction/SchemaBuilder.cpp b/dbms/src/Storages/Transaction/SchemaBuilder.cpp index 984aede5296..e63b1e58e81 100644 --- a/dbms/src/Storages/Transaction/SchemaBuilder.cpp +++ b/dbms/src/Storages/Transaction/SchemaBuilder.cpp @@ -353,7 +353,7 @@ void SchemaBuilder::applyAlterTable(DBInfoPtr db_info, Table auto table_info = getter.getTableInfo(db_info->id, table_id); if (table_info == nullptr) { - throw TiFlashException("miss table in TiKV : " + std::to_string(table_id), Errors::DDL::MissingTable); + throw TiFlashException("miss table in TiKV : " + std::to_string(table_id), Errors::DDL::StaleSchema); } auto & tmt_context = context.getTMTContext(); auto storage = tmt_context.getStorages().get(table_info->id); @@ -409,7 +409,7 @@ void SchemaBuilder::applyDiff(const SchemaDiff & diff) auto db_info = getter.getDatabase(diff.schema_id); if (db_info == nullptr) - throw TiFlashException("miss database: " + std::to_string(diff.schema_id), Errors::DDL::MissingTable); + throw TiFlashException("miss database: " + std::to_string(diff.schema_id), Errors::DDL::StaleSchema); TableID old_table_id = 0, new_table_id = 0; @@ -491,7 +491,7 @@ void SchemaBuilder::applyPartitionDiff(TiDB::DBInfoPtr db_in auto table_info = getter.getTableInfo(db_info->id, table_id); if (table_info == nullptr) { - throw TiFlashException("miss old table id in TiKV " + std::to_string(table_id), Errors::DDL::MissingTable); + throw TiFlashException("miss old table id in TiKV " + std::to_string(table_id), Errors::DDL::StaleSchema); } if (!table_info->isLogicalPartitionTable()) { @@ -672,17 +672,17 @@ void SchemaBuilder::applyExchangeTablePartition(const Schema throw Exception("Incorrect schema diff, no affected_opts for alter table exchange partition schema diff", ErrorCodes::DDL_ERROR); auto npt_db_info = getter.getDatabase(diff.schema_id); if (npt_db_info == nullptr) - throw TiFlashException("miss database: " + std::to_string(diff.schema_id), Errors::DDL::MissingTable); + throw TiFlashException("miss database: " + std::to_string(diff.schema_id), Errors::DDL::StaleSchema); auto pt_db_info = getter.getDatabase(diff.affected_opts[0].schema_id); if (pt_db_info == nullptr) - throw TiFlashException("miss database: " + std::to_string(diff.affected_opts[0].schema_id), Errors::DDL::MissingTable); + throw TiFlashException("miss database: " + std::to_string(diff.affected_opts[0].schema_id), Errors::DDL::StaleSchema); auto npt_table_id = diff.old_table_id; auto pt_partition_id = diff.table_id; auto pt_table_info = diff.affected_opts[0].table_id; /// step 1 change the mete data of partition table auto table_info = getter.getTableInfo(pt_db_info->id, pt_table_info); if (table_info == nullptr) - throw TiFlashException("miss table in TiKV : " + std::to_string(pt_table_info), Errors::DDL::MissingTable); + throw TiFlashException("miss table in TiKV : " + std::to_string(pt_table_info), Errors::DDL::StaleSchema); auto & tmt_context = context.getTMTContext(); auto storage = tmt_context.getStorages().get(table_info->id); if (storage == nullptr) @@ -722,7 +722,7 @@ void SchemaBuilder::applyExchangeTablePartition(const Schema /// step 3 change partition of the partition table to non partition table table_info = getter.getTableInfo(npt_db_info->id, pt_partition_id); if (table_info == nullptr) - throw TiFlashException("miss table in TiKV : " + std::to_string(pt_partition_id), Errors::DDL::MissingTable); + throw TiFlashException("miss table in TiKV : " + std::to_string(pt_partition_id), Errors::DDL::StaleSchema); storage = tmt_context.getStorages().get(table_info->id); if (storage == nullptr) throw TiFlashException( @@ -1087,7 +1087,7 @@ void SchemaBuilder::applySetTiFlashReplica(TiDB::DBInfoPtr d auto latest_table_info = getter.getTableInfo(db_info->id, table_id); if (unlikely(latest_table_info == nullptr)) { - throw TiFlashException("miss table in TiKV : " + DB::toString(table_id), Errors::DDL::MissingTable); + throw TiFlashException("miss table in TiKV : " + DB::toString(table_id), Errors::DDL::StaleSchema); } auto & tmt_context = context.getTMTContext(); auto storage = tmt_context.getStorages().get(latest_table_info->id); diff --git a/dbms/src/Storages/Transaction/TiDBSchemaSyncer.h b/dbms/src/Storages/Transaction/TiDBSchemaSyncer.h index 1b2c7950b12..ef9d264d2e1 100644 --- a/dbms/src/Storages/Transaction/TiDBSchemaSyncer.h +++ b/dbms/src/Storages/Transaction/TiDBSchemaSyncer.h @@ -153,6 +153,15 @@ struct TiDBSchemaSyncer : public SchemaSyncer builder.applyDiff(diff); } } + catch (TiFlashException & e) + { + if (!e.getError().is(Errors::DDL::StaleSchema)) + { + GET_METRIC(context.getTiFlashMetrics(), tiflash_schema_apply_count, type_failed).Increment(); + } + LOG_WARNING(log, "apply diff meets exception : " << e.displayText() << " \n stack is " << e.getStackTrace().toString()); + return false; + } catch (Exception & e) { GET_METRIC(context.getTiFlashMetrics(), tiflash_schema_apply_count, type_failed).Increment(); From 1b54e55201e32e4599e1367609a8e01deb71a574 Mon Sep 17 00:00:00 2001 From: Ti Chi Robot Date: Mon, 24 Jan 2022 23:35:29 +0800 Subject: [PATCH 06/21] fix main_capacity_quota_ check (#3349) (#3426) * This is an automated cherry-pick of #3349 Signed-off-by: ti-chi-bot * fix ut conflict * Update gtest_path_pool.cpp Co-authored-by: hehechen Co-authored-by: JaySon --- dbms/src/Storages/PathCapacityMetrics.cpp | 33 +++++++++++--------- dbms/src/Storages/tests/gtest_path_pool.cpp | 34 +++++++++++++++++++++ 2 files changed, 53 insertions(+), 14 deletions(-) diff --git a/dbms/src/Storages/PathCapacityMetrics.cpp b/dbms/src/Storages/PathCapacityMetrics.cpp index 47aa2c8c6c6..ec9dc667f96 100644 --- a/dbms/src/Storages/PathCapacityMetrics.cpp +++ b/dbms/src/Storages/PathCapacityMetrics.cpp @@ -21,16 +21,21 @@ extern const Metric StoreSizeUsed; namespace DB { +inline size_t safeGetQuota(const std::vector & quotas, size_t idx) +{ + return idx < quotas.size() ? quotas[idx] : 0; +} -inline size_t safeGetQuota(const std::vector & quotas, size_t idx) { return idx < quotas.size() ? quotas[idx] : 0; } - -PathCapacityMetrics::PathCapacityMetrics( // - const size_t capacity_quota_, // will be ignored if `main_capacity_quota` is not empty - const Strings & main_paths_, const std::vector main_capacity_quota_, // - const Strings & latest_paths_, const std::vector latest_capacity_quota_) - : capacity_quota(capacity_quota_), log(&Poco::Logger::get("PathCapacityMetrics")) +PathCapacityMetrics::PathCapacityMetrics( + const size_t capacity_quota_, // will be ignored if `main_capacity_quota` is not empty + const Strings & main_paths_, + const std::vector main_capacity_quota_, + const Strings & latest_paths_, + const std::vector latest_capacity_quota_) + : capacity_quota(capacity_quota_) + , log(&Poco::Logger::get("PathCapacityMetrics")) { - if (main_capacity_quota_.empty()) + if (!main_capacity_quota_.empty()) { // The `capacity_quota_` is left for backward compatibility. // If `main_capacity_quota_` is not empty, use the capacity for each path instead of global capacity. @@ -130,10 +135,10 @@ FsStats PathCapacityMetrics::getFsStats() const // Default threshold "schedule.low-space-ratio" in PD is 0.8, log warning message if avail ratio is low. if (avail_rate <= 0.2) LOG_WARNING(log, - "Available space is only " << DB::toString(avail_rate * 100.0, 2) - << "% of capacity size. Avail size: " << formatReadableSizeWithBinarySuffix(total_stat.avail_size) - << ", used size: " << formatReadableSizeWithBinarySuffix(total_stat.used_size) - << ", capacity size: " << formatReadableSizeWithBinarySuffix(total_stat.capacity_size)); + "Available space is only " << DB::toString(avail_rate * 100.0, 2) + << "% of capacity size. Avail size: " << formatReadableSizeWithBinarySuffix(total_stat.avail_size) + << ", used size: " << formatReadableSizeWithBinarySuffix(total_stat.used_size) + << ", capacity size: " << formatReadableSizeWithBinarySuffix(total_stat.capacity_size)); total_stat.ok = 1; CurrentMetrics::set(CurrentMetrics::StoreSizeCapacity, total_stat.capacity_size); @@ -219,8 +224,8 @@ FsStats PathCapacityMetrics::CapacityInfo::getStats(Poco::Logger * log) const avail = capacity - res.used_size; else if (log) LOG_WARNING(log, - "No available space for path: " << path << ", capacity: " << formatReadableSizeWithBinarySuffix(capacity) // - << ", used: " << formatReadableSizeWithBinarySuffix(used_bytes)); + "No available space for path: " << path << ", capacity: " << formatReadableSizeWithBinarySuffix(capacity) // + << ", used: " << formatReadableSizeWithBinarySuffix(used_bytes)); const uint64_t disk_free_bytes = vfs.f_bavail * vfs.f_frsize; if (avail > disk_free_bytes) diff --git a/dbms/src/Storages/tests/gtest_path_pool.cpp b/dbms/src/Storages/tests/gtest_path_pool.cpp index 5cdda3fa916..24c826881ae 100644 --- a/dbms/src/Storages/tests/gtest_path_pool.cpp +++ b/dbms/src/Storages/tests/gtest_path_pool.cpp @@ -321,5 +321,39 @@ try } } CATCH + +static void createIfNotExist(const String & path) +{ + if (Poco::File file(path); !file.exists()) + file.createDirectories(); +} + +TEST(PathCapcatity, FsStats) +try +{ + std::string main_data_path = TiFlashTestEnv::getTemporaryPath() + "/main"; + createIfNotExist(main_data_path); + + std::string latest_data_path = TiFlashTestEnv::getTemporaryPath() + "/lastest"; + createIfNotExist(latest_data_path); + + size_t global_capacity_quota = 10; + size_t capacity = 100; + { + PathCapacityMetrics path_capacity(global_capacity_quota, {main_data_path}, {capacity}, {latest_data_path}, {capacity}); + + FsStats fs_stats = path_capacity.getFsStats(); + EXPECT_EQ(fs_stats.capacity_size, 2 * capacity); // summing the capacity of main and latest path + } + + { + PathCapacityMetrics path_capacity(global_capacity_quota, {main_data_path}, {}, {latest_data_path}, {}); + + FsStats fs_stats = path_capacity.getFsStats(); + EXPECT_EQ(fs_stats.capacity_size, global_capacity_quota); // Use `global_capacity_quota` when `main_capacity_quota_` is empty + } +} +CATCH + } // namespace tests } // namespace DB From c52a7e2d31ea44e534d607545686fd3836e15efb Mon Sep 17 00:00:00 2001 From: Ti Chi Robot Date: Tue, 25 Jan 2022 00:02:12 +0800 Subject: [PATCH 07/21] Fix potential data inconsistency when widen pk column type if pk is handle (#3568) (#3573) --- .../Transaction/RegionBlockReader.cpp | 20 ++++++++++++- tests/fullstack-test2/ddl/widen_pk.test | 28 +++++++++++++++++++ 2 files changed, 47 insertions(+), 1 deletion(-) create mode 100644 tests/fullstack-test2/ddl/widen_pk.test diff --git a/dbms/src/Storages/Transaction/RegionBlockReader.cpp b/dbms/src/Storages/Transaction/RegionBlockReader.cpp index 1f663c4a999..fc407fc7dc3 100644 --- a/dbms/src/Storages/Transaction/RegionBlockReader.cpp +++ b/dbms/src/Storages/Transaction/RegionBlockReader.cpp @@ -352,7 +352,25 @@ bool setColumnValues(ColumnUInt8 & delmark_col, } } else - column_map.getMutableColumnPtr(pk_column_ids[0])->insert(Field(static_cast(pk))); + { + // The pk_type must be Int32/Uint32 or more narrow type + // so cannot tell its' exact type here, just use `insert(Field)` + HandleID handle_value(static_cast(pk)); + auto & pk_column = column_map.getMutableColumnPtr(pk_column_ids[0]); + pk_column->insert(Field(handle_value)); + if (unlikely(pk_column->getInt(index) != handle_value)) + { + if (!force_decode) + { + return false; + } + else + { + throw Exception("Detected overflow value when decoding pk column of type " + pk_column->getName(), + ErrorCodes::LOGICAL_ERROR); + } + } + } index++; } decoded_data.checkValid(); diff --git a/tests/fullstack-test2/ddl/widen_pk.test b/tests/fullstack-test2/ddl/widen_pk.test new file mode 100644 index 00000000000..2fdb5ec98ba --- /dev/null +++ b/tests/fullstack-test2/ddl/widen_pk.test @@ -0,0 +1,28 @@ +mysql> drop table if exists test.t +mysql> create table test.t(a int primary key) +mysql> alter table test.t set tiflash replica 1 + +func> wait_table test t + +mysql> insert into test.t values(1); + +mysql> select /*+ read_from_storage(tiflash[t]) */ * from test.t; ++---+ +| a | ++---+ +| 1 | ++---+ + +>> DBGInvoke __enable_schema_sync_service('false') + +mysql> alter table test.t modify column a bigint; + +mysql> insert into test.t values(9223372036854775807); + +mysql> select /*+ read_from_storage(tiflash[t]) */ * from test.t; ++---------------------+ +| a | ++---------------------+ +| 1 | +| 9223372036854775807 | ++---------------------+ From 714fc7d18978c805b579fd588de43a9c2894e08d Mon Sep 17 00:00:00 2001 From: Ti Chi Robot Date: Tue, 25 Jan 2022 15:12:11 +0800 Subject: [PATCH 08/21] Fix ~DeltaTree try to delete uninitialized members (#3903) (#3916) --- dbms/src/Storages/DeltaMerge/DeltaTree.h | 37 ++++++++++++++---------- 1 file changed, 22 insertions(+), 15 deletions(-) diff --git a/dbms/src/Storages/DeltaMerge/DeltaTree.h b/dbms/src/Storages/DeltaMerge/DeltaTree.h index aebadf8ae44..c2482c4ab13 100644 --- a/dbms/src/Storages/DeltaMerge/DeltaTree.h +++ b/dbms/src/Storages/DeltaMerge/DeltaTree.h @@ -480,7 +480,7 @@ class DTEntryIterator { using LeafPtr = DTLeaf *; - LeafPtr leaf; + LeafPtr leaf = nullptr; size_t pos; Int64 delta; @@ -553,8 +553,8 @@ class DTEntriesCopy : Allocator const size_t entry_count; const Int64 delta; - UInt64 * const sids; - DTMutation * const mutations; + UInt64 * const sids = nullptr; + DTMutation * const mutations = nullptr; public: DTEntriesCopy(LeafPtr left_leaf, size_t entry_count_, Int64 delta_) @@ -578,8 +578,10 @@ class DTEntriesCopy : Allocator ~DTEntriesCopy() { - this->free(sids, sizeof(UInt64) * entry_count); - this->free(mutations, sizeof(DTMutation) * entry_count); + if (sids) + this->free(sids, sizeof(UInt64) * entry_count); + if (mutations) + this->free(mutations, sizeof(DTMutation) * entry_count); } class Iterator @@ -728,18 +730,19 @@ class DeltaTree static_assert(std::is_standard_layout_v); private: - NodePtr root; - LeafPtr left_leaf, right_leaf; + NodePtr root = nullptr; + LeafPtr left_leaf = nullptr; + LeafPtr right_leaf = nullptr; size_t height = 1; size_t num_inserts = 0; size_t num_deletes = 0; size_t num_entries = 0; - Allocator * allocator; + Allocator * allocator = nullptr; size_t bytes = 0; - Logger * log; + Logger * log = nullptr; public: // For test cases only. @@ -889,12 +892,16 @@ class DeltaTree ~DeltaTree() { - if (isLeaf(root)) - freeTree((LeafPtr)root); - else - freeTree((InternPtr)root); + if (root) + { + if (isLeaf(root)) + freeTree((LeafPtr)root); + else + freeTree((InternPtr)root); + } - delete allocator; + if (allocator) + delete allocator; LOG_TRACE(log, "free"); } @@ -1480,4 +1487,4 @@ typename DT_CLASS::InternPtr DT_CLASS::afterNodeUpdated(T * node) #undef DT_CLASS } // namespace DM -} // namespace DB \ No newline at end of file +} // namespace DB From e182581ced5097044d73d0d3273b839eeae6e6de Mon Sep 17 00:00:00 2001 From: Ti Chi Robot Date: Wed, 26 Jan 2022 11:44:44 +0800 Subject: [PATCH 09/21] avoid tiflash crash when query is killed (#3434) (#3449) --- .../ParallelAggregatingBlockInputStream.cpp | 4 +++- dbms/src/DataStreams/UnionBlockInputStream.h | 3 ++- dbms/src/Flash/Mpp/MPPHandler.h | 11 +++++++---- 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/dbms/src/DataStreams/ParallelAggregatingBlockInputStream.cpp b/dbms/src/DataStreams/ParallelAggregatingBlockInputStream.cpp index 51f242fa2ab..1f7daa7ef2e 100644 --- a/dbms/src/DataStreams/ParallelAggregatingBlockInputStream.cpp +++ b/dbms/src/DataStreams/ParallelAggregatingBlockInputStream.cpp @@ -156,7 +156,9 @@ void ParallelAggregatingBlockInputStream::Handler::onFinish() void ParallelAggregatingBlockInputStream::Handler::onException(std::exception_ptr & exception, size_t thread_num) { parent.exceptions[thread_num] = exception; - parent.cancel(false); + /// can not cancel parent inputStream or the exception might be lost + if (!parent.executed) + parent.processor.cancel(false); } diff --git a/dbms/src/DataStreams/UnionBlockInputStream.h b/dbms/src/DataStreams/UnionBlockInputStream.h index 71e72672aa3..520eb144142 100644 --- a/dbms/src/DataStreams/UnionBlockInputStream.h +++ b/dbms/src/DataStreams/UnionBlockInputStream.h @@ -284,7 +284,8 @@ class UnionBlockInputStream final : public IProfilingBlockInputStream /// and the exception is lost. parent.output_queue.push(exception); - parent.cancel(false); /// Does not throw exceptions. + /// can not cancel parent inputStream or the exception might be lost + parent.processor.cancel(false); /// Does not throw exceptions. } Self & parent; diff --git a/dbms/src/Flash/Mpp/MPPHandler.h b/dbms/src/Flash/Mpp/MPPHandler.h index 0ac8aa910a5..c8b125cac64 100644 --- a/dbms/src/Flash/Mpp/MPPHandler.h +++ b/dbms/src/Flash/Mpp/MPPHandler.h @@ -236,14 +236,16 @@ enum TaskStatus struct MPPTask : std::enable_shared_from_this, private boost::noncopyable { - Context context; std::unique_ptr dag_req; - std::unique_ptr dag_context; + Context context; /// store io in MPPTask to keep the life cycle of memory_tracker for the current query - /// BlockIO contains some information stored in Context and DAGContext, so need deconstruct it before Context and DAGContext + /// BlockIO contains some information stored in Context, so need deconstruct it before Context BlockIO io; + /// The inputStreams should be released in the destructor of BlockIO, since DAGContext contains + /// some reference to inputStreams, so it need to be destructed before BlockIO + std::unique_ptr dag_context; MemoryTracker * memory_tracker = nullptr; MPPTaskId id; @@ -351,7 +353,8 @@ struct MPPTask : std::enable_shared_from_this, private boost::noncopyab { /// MPPTask maybe destructed by different thread, set the query memory_tracker /// to current_memory_tracker in the destructor - current_memory_tracker = memory_tracker; + if (current_memory_tracker != memory_tracker) + current_memory_tracker = memory_tracker; closeAllTunnel(""); LOG_DEBUG(log, "finish MPPTask: " << id.toString()); } From 31dab714286f557734393dec1db8f5b22bc25d6c Mon Sep 17 00:00:00 2001 From: Ti Chi Robot Date: Wed, 26 Jan 2022 12:26:44 +0800 Subject: [PATCH 10/21] Fix ha test random failure (#3649) (#3707) --- contrib/client-c | 2 +- dbms/src/Flash/Mpp/ExchangeReceiver.cpp | 6 ++++-- dbms/src/Server/Server.cpp | 5 +++-- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/contrib/client-c b/contrib/client-c index 1325209da01..10482e4fbb9 160000 --- a/contrib/client-c +++ b/contrib/client-c @@ -1 +1 @@ -Subproject commit 1325209da019f5d6e2a17b18a3b4e1f4a49f0852 +Subproject commit 10482e4fbb92727fea14a9691bb7d29d408f25ca diff --git a/dbms/src/Flash/Mpp/ExchangeReceiver.cpp b/dbms/src/Flash/Mpp/ExchangeReceiver.cpp index ff3408f42bb..ea23365cff3 100644 --- a/dbms/src/Flash/Mpp/ExchangeReceiver.cpp +++ b/dbms/src/Flash/Mpp/ExchangeReceiver.cpp @@ -51,7 +51,8 @@ void ExchangeReceiver::ReadLoop(const String & meta_raw, size_t source_index) req->set_allocated_sender_meta(sender_task); LOG_DEBUG(log, "begin start and read : " << req->DebugString()); ::grpc::Status status = ::grpc::Status::OK; - for (int i = 0; i < 10; i++) + static const Int32 MAX_RETRY_TIMES = 10; + for (int i = 0; i < MAX_RETRY_TIMES; i++) { pingcap::kv::RpcCall call(req); grpc::ClientContext client_context; @@ -93,8 +94,9 @@ void ExchangeReceiver::ReadLoop(const String & meta_raw, size_t source_index) } else { + bool retriable = !has_data && i + 1 < MAX_RETRY_TIMES; LOG_WARNING(log, - "EstablishMPPConnectionRequest meets rpc fail. Err msg is: " << status.error_message() << " req info " << req_info); + "EstablishMPPConnectionRequest meets rpc fail for req " << req_info << ". Err code = " << status.error_code() << ", err msg = " << status.error_message() << ", retriable = " << retriable); // if we have received some data, we should not retry. if (has_data) break; diff --git a/dbms/src/Server/Server.cpp b/dbms/src/Server/Server.cpp index fbe1221ab7d..3d185756105 100644 --- a/dbms/src/Server/Server.cpp +++ b/dbms/src/Server/Server.cpp @@ -907,8 +907,9 @@ int Server::main(const std::vector & /*args*/) /// Init and register flash service. flash_service = std::make_unique(*this); diagnostics_service = std::make_unique(*this); - builder.SetOption(grpc::MakeChannelArgumentOption("grpc.http2.min_ping_interval_without_data_ms", 10 * 1000)); - builder.SetOption(grpc::MakeChannelArgumentOption("grpc.http2.min_time_between_pings_ms", 10 * 1000)); + builder.SetOption(grpc::MakeChannelArgumentOption(GRPC_ARG_HTTP2_MIN_RECV_PING_INTERVAL_WITHOUT_DATA_MS, 5 * 1000)); + builder.SetOption(grpc::MakeChannelArgumentOption(GRPC_ARG_HTTP2_MIN_SENT_PING_INTERVAL_WITHOUT_DATA_MS, 10 * 1000)); + builder.SetOption(grpc::MakeChannelArgumentOption(GRPC_ARG_KEEPALIVE_PERMIT_WITHOUT_CALLS, 1)); builder.RegisterService(flash_service.get()); LOG_INFO(log, "Flash service registered"); builder.RegisterService(diagnostics_service.get()); From c9a65e5cb255f97800229cbf1bddce834480bf1f Mon Sep 17 00:00:00 2001 From: Ti Chi Robot Date: Wed, 26 Jan 2022 13:06:44 +0800 Subject: [PATCH 11/21] Add retry when decode dag request failed (#3334) (#3677) --- dbms/src/Flash/BatchCoprocessorHandler.cpp | 2 +- dbms/src/Flash/Coprocessor/DAGContext.h | 19 ++++------- dbms/src/Flash/Coprocessor/DAGUtils.cpp | 20 +++++++++++ dbms/src/Flash/Coprocessor/DAGUtils.h | 1 + dbms/src/Flash/CoprocessorHandler.cpp | 2 +- dbms/src/Flash/Mpp/MPPHandler.cpp | 33 +++++++++---------- .../expr/expr_tree_too_deep.test | 23 +++++++++++++ 7 files changed, 67 insertions(+), 33 deletions(-) create mode 100644 tests/fullstack-test/expr/expr_tree_too_deep.test diff --git a/dbms/src/Flash/BatchCoprocessorHandler.cpp b/dbms/src/Flash/BatchCoprocessorHandler.cpp index 61ef2a89a6a..e2dec60f234 100644 --- a/dbms/src/Flash/BatchCoprocessorHandler.cpp +++ b/dbms/src/Flash/BatchCoprocessorHandler.cpp @@ -44,7 +44,7 @@ grpc::Status BatchCoprocessorHandler::execute() const auto dag_request = ({ tipb::DAGRequest dag_req; - dag_req.ParseFromString(cop_request->data()); + getDAGRequestFromStringWithRetry(dag_req, cop_request->data()); std::move(dag_req); }); RegionInfoMap regions; diff --git a/dbms/src/Flash/Coprocessor/DAGContext.h b/dbms/src/Flash/Coprocessor/DAGContext.h index b19d28a28a0..8f86f97d516 100644 --- a/dbms/src/Flash/Coprocessor/DAGContext.h +++ b/dbms/src/Flash/Coprocessor/DAGContext.h @@ -34,28 +34,21 @@ class DAGContext flags(dag_request.flags()), sql_mode(dag_request.sql_mode()), _warnings(std::numeric_limits::max()){}; - explicit DAGContext(const tipb::DAGRequest & dag_request, const mpp::TaskMeta & meta_) + explicit DAGContext(const tipb::DAGRequest & dag_request, const mpp::TaskMeta & meta_, bool is_root_mpp_task_) : collect_execution_summaries(dag_request.has_collect_execution_summaries() && dag_request.collect_execution_summaries()), return_executor_id(true), is_mpp_task(true), + is_root_mpp_task(is_root_mpp_task_), flags(dag_request.flags()), sql_mode(dag_request.sql_mode()), mpp_task_meta(meta_), _warnings(std::numeric_limits::max()) { + assert(dag_request.has_root_executor()); exchange_sender_executor_id = dag_request.root_executor().executor_id(); - const auto & exchangeSender = dag_request.root_executor().exchange_sender(); - exchange_sender_execution_summary_key = exchangeSender.child().executor_id(); - is_root_mpp_task = false; - if (exchangeSender.encoded_task_meta_size() == 1) - { - /// root mpp task always has 1 task_meta because there is only one TiDB - /// node for each mpp query - mpp::TaskMeta task_meta; - task_meta.ParseFromString(exchangeSender.encoded_task_meta(0)); - is_root_mpp_task = task_meta.task_id() == -1; - } - }; + exchange_sender_execution_summary_key = dag_request.root_executor().exchange_sender().child().executor_id(); + } + std::map & getProfileStreamsMap(); std::unordered_map & getProfileStreamsMapForJoinBuildSide(); std::unordered_map> & getQBIdToJoinAliasMap(); diff --git a/dbms/src/Flash/Coprocessor/DAGUtils.cpp b/dbms/src/Flash/Coprocessor/DAGUtils.cpp index 30ee59984b4..698827dcf5c 100644 --- a/dbms/src/Flash/Coprocessor/DAGUtils.cpp +++ b/dbms/src/Flash/Coprocessor/DAGUtils.cpp @@ -483,6 +483,26 @@ void assertBlockSchema(const DataTypes & expected_types, const Block & block, co } } +void getDAGRequestFromStringWithRetry(tipb::DAGRequest & dag_req, const String & s) +{ + if (!dag_req.ParseFromString(s)) + { + /// ParseFromString will use the default recursion limit, which is 100 to decode the plan, if the plan tree is too deep, + /// it may exceed this limit, so just try again by double the recursion limit + ::google::protobuf::io::CodedInputStream coded_input_stream(reinterpret_cast(s.data()), s.size()); + coded_input_stream.SetRecursionLimit(::google::protobuf::io::CodedInputStream::GetDefaultRecursionLimit() * 2); + if (!dag_req.ParseFromCodedStream(&coded_input_stream)) + { + /// just return error if decode failed this time, because it's really a corner case, and even if we can decode the plan + /// successfully by using a very large value of the recursion limit, it is kinds of meaningless because the runtime + /// performance of this task may be very bad if the plan tree is too deep + throw TiFlashException( + std::string(__PRETTY_FUNCTION__) + ": Invalid encoded plan, the most likely is that the plan/expression tree is too deep", + Errors::Coprocessor::BadRequest); + } + } +} + extern const String UniqRawResName; std::unordered_map agg_func_map({ diff --git a/dbms/src/Flash/Coprocessor/DAGUtils.h b/dbms/src/Flash/Coprocessor/DAGUtils.h index 0704e003d21..44f80ea9287 100644 --- a/dbms/src/Flash/Coprocessor/DAGUtils.h +++ b/dbms/src/Flash/Coprocessor/DAGUtils.h @@ -67,5 +67,6 @@ class UniqueNameGenerator return ret_name; } }; +void getDAGRequestFromStringWithRetry(tipb::DAGRequest & req, const String & s); } // namespace DB diff --git a/dbms/src/Flash/CoprocessorHandler.cpp b/dbms/src/Flash/CoprocessorHandler.cpp index beba38ff3be..f492a3e5085 100644 --- a/dbms/src/Flash/CoprocessorHandler.cpp +++ b/dbms/src/Flash/CoprocessorHandler.cpp @@ -60,7 +60,7 @@ grpc::Status CoprocessorHandler::execute() SCOPE_EXIT({ GET_METRIC(cop_context.metrics, tiflash_coprocessor_handling_request_count, type_cop_dag).Decrement(); }); tipb::DAGRequest dag_request; - dag_request.ParseFromString(cop_request->data()); + getDAGRequestFromStringWithRetry(dag_request, cop_request->data()); LOG_DEBUG(log, __PRETTY_FUNCTION__ << ": Handling DAG request: " << dag_request.DebugString()); if (dag_request.has_is_rpn_expr() && dag_request.is_rpn_expr()) throw TiFlashException( diff --git a/dbms/src/Flash/Mpp/MPPHandler.cpp b/dbms/src/Flash/Mpp/MPPHandler.cpp index fc2354890b8..49e78acae18 100644 --- a/dbms/src/Flash/Mpp/MPPHandler.cpp +++ b/dbms/src/Flash/Mpp/MPPHandler.cpp @@ -106,23 +106,7 @@ std::vector MPPTask::prepare(const mpp::DispatchTaskRequest & task_r { auto start_time = Clock::now(); dag_req = std::make_unique(); - if (!dag_req->ParseFromString(task_request.encoded_plan())) - { - /// ParseFromString will use the default recursion limit, which is 100 to decode the plan, if the plan tree is too deep, - /// it may exceed this limit, so just try again by double the recursion limit - ::google::protobuf::io::CodedInputStream coded_input_stream( - reinterpret_cast(task_request.encoded_plan().data()), task_request.encoded_plan().size()); - coded_input_stream.SetRecursionLimit(::google::protobuf::io::CodedInputStream::GetDefaultRecursionLimit() * 2); - if (!dag_req->ParseFromCodedStream(&coded_input_stream)) - { - /// just return error if decode failed this time, because it's really a corner case, and even if we can decode the plan - /// successfully by using a very large value of the recursion limit, it is kinds of meaningless because the runtime - /// performance of this task may be very bad if the plan tree is too deep - throw TiFlashException( - std::string(__PRETTY_FUNCTION__) + ": Invalid encoded plan, the most likely is that the plan tree is too deep", - Errors::Coprocessor::BadRequest); - } - } + getDAGRequestFromStringWithRetry(*dag_req, task_request.encoded_plan()); RegionInfoMap regions; RegionInfoList retry_regions; for (auto & r : task_request.regions()) @@ -160,7 +144,20 @@ std::vector MPPTask::prepare(const mpp::DispatchTaskRequest & task_r } context.getTimezoneInfo().resetByDAGRequest(*dag_req); - dag_context = std::make_unique(*dag_req, task_request.meta()); + bool is_root_mpp_task = false; + const auto & exchange_sender = dag_req->root_executor().exchange_sender(); + if (exchange_sender.encoded_task_meta_size() == 1) + { + /// root mpp task always has 1 task_meta because there is only one TiDB + /// node for each mpp query + mpp::TaskMeta task_meta; + if (!task_meta.ParseFromString(exchange_sender.encoded_task_meta(0))) + { + throw TiFlashException("Failed to decode task meta info in ExchangeSender", Errors::Coprocessor::BadRequest); + } + is_root_mpp_task = task_meta.task_id() == -1; + } + dag_context = std::make_unique(*dag_req, task_request.meta(), is_root_mpp_task); context.setDAGContext(dag_context.get()); // register task. diff --git a/tests/fullstack-test/expr/expr_tree_too_deep.test b/tests/fullstack-test/expr/expr_tree_too_deep.test new file mode 100644 index 00000000000..979810a4f94 --- /dev/null +++ b/tests/fullstack-test/expr/expr_tree_too_deep.test @@ -0,0 +1,23 @@ +mysql> drop table if exists test.t +mysql> create table test.t(id int, value int) +mysql> alter table test.t set tiflash replica 1 +mysql> insert into test.t values(1, -1) + +func> wait_table test t + +# test cop +mysql> set tidb_isolation_read_engines='tiflash,tidb'; set tidb_allow_mpp = 0; select * from test.t where case when value < 100 then case when value < 99 then case when value < 98 then case when value < 97 then case when value < 96 then case when value < 95 then case when value < 94 then case when value < 93 then case when value < 92 then case when value < 91 then case when value < 90 then case when value < 89 then case when value < 88 then case when value < 87 then case when value < 86 then case when value < 85 then case when value < 84 then case when value < 83 then case when value < 82 then case when value < 81 then case when value < 80 then case when value < 79 then case when value < 78 then case when value < 77 then case when value < 76 then case when value < 75 then case when value < 74 then case when value < 73 then case when value < 72 then case when value < 71 then case when value < 70 then case when value < 69 then case when value < 68 then case when value < 67 then case when value < 66 then case when value < 65 then case when value < 64 then case when value < 63 then case when value < 62 then case when value < 61 then case when value < 60 then case when value < 59 then case when value < 58 then case when value < 57 then case when value < 56 then case when value < 55 then case when value < 54 then case when value < 53 then case when value < 52 then case when value < 51 then case when value < 50 then case when value < 49 then case when value < 48 then case when value < 47 then case when value < 46 then case when value < 45 then case when value < 44 then case when value < 43 then case when value < 42 then case when value < 41 then case when value < 40 then case when value < 39 then case when value < 38 then case when value < 37 then case when value < 36 then case when value < 35 then case when value < 34 then case when value < 33 then case when value < 32 then case when value < 31 then case when value < 30 then case when value < 29 then case when value < 28 then case when value < 27 then case when value < 26 then case when value < 29 then case when value < 24 then case when value < 23 then case when value < 22 then case when value < 21 then case when value < 20 then case when value < 19 then case when value < 18 then case when value < 17 then case when value < 16 then case when value < 15 then case when value < 14 then case when value < 13 then case when value < 12 then case when value < 11 then case when value < 10 then case when value < 9 then case when value < 8 then case when value < 7 then case when value < 6 then case when value < 5 then case when value < 4 then case when value < 3 then case when value < 2 then case when value < 1 then case when value < 0 then 1 end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end ++------+-------+ +| id | value | ++------+-------+ +| 1 | -1 | ++------+-------+ + +# test batch cop +mysql> set tidb_allow_batch_cop=2; set tidb_isolation_read_engines='tiflash,tidb'; set tidb_allow_mpp = 0; select * from test.t where case when value < 100 then case when value < 99 then case when value < 98 then case when value < 97 then case when value < 96 then case when value < 95 then case when value < 94 then case when value < 93 then case when value < 92 then case when value < 91 then case when value < 90 then case when value < 89 then case when value < 88 then case when value < 87 then case when value < 86 then case when value < 85 then case when value < 84 then case when value < 83 then case when value < 82 then case when value < 81 then case when value < 80 then case when value < 79 then case when value < 78 then case when value < 77 then case when value < 76 then case when value < 75 then case when value < 74 then case when value < 73 then case when value < 72 then case when value < 71 then case when value < 70 then case when value < 69 then case when value < 68 then case when value < 67 then case when value < 66 then case when value < 65 then case when value < 64 then case when value < 63 then case when value < 62 then case when value < 61 then case when value < 60 then case when value < 59 then case when value < 58 then case when value < 57 then case when value < 56 then case when value < 55 then case when value < 54 then case when value < 53 then case when value < 52 then case when value < 51 then case when value < 50 then case when value < 49 then case when value < 48 then case when value < 47 then case when value < 46 then case when value < 45 then case when value < 44 then case when value < 43 then case when value < 42 then case when value < 41 then case when value < 40 then case when value < 39 then case when value < 38 then case when value < 37 then case when value < 36 then case when value < 35 then case when value < 34 then case when value < 33 then case when value < 32 then case when value < 31 then case when value < 30 then case when value < 29 then case when value < 28 then case when value < 27 then case when value < 26 then case when value < 29 then case when value < 24 then case when value < 23 then case when value < 22 then case when value < 21 then case when value < 20 then case when value < 19 then case when value < 18 then case when value < 17 then case when value < 16 then case when value < 15 then case when value < 14 then case when value < 13 then case when value < 12 then case when value < 11 then case when value < 10 then case when value < 9 then case when value < 8 then case when value < 7 then case when value < 6 then case when value < 5 then case when value < 4 then case when value < 3 then case when value < 2 then case when value < 1 then case when value < 0 then 1 end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end end ++------+-------+ +| id | value | ++------+-------+ +| 1 | -1 | ++------+-------+ +mysql> drop table if exists test.t From d8adc93496f3b7d83c2e32bb78737f05ed0a21d2 Mon Sep 17 00:00:00 2001 From: Ti Chi Robot Date: Wed, 26 Jan 2022 14:40:46 +0800 Subject: [PATCH 12/21] Fix the inconsistent behavior of CastStringAsDecimal between tiflash and tidb/tikv. (#3621) (#3675) --- dbms/src/Functions/FunctionsTiDBConversion.h | 2 +- .../expr/cast_string_as_decimal.test | 22 +++++++++++++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) create mode 100644 tests/fullstack-test/expr/cast_string_as_decimal.test diff --git a/dbms/src/Functions/FunctionsTiDBConversion.h b/dbms/src/Functions/FunctionsTiDBConversion.h index b8a0d3b4c1b..b41c105566d 100644 --- a/dbms/src/Functions/FunctionsTiDBConversion.h +++ b/dbms/src/Functions/FunctionsTiDBConversion.h @@ -1005,9 +1005,9 @@ struct TiDBConvertToDecimal size_t pos = 0; if (decimal_parts.int_part.data[pos] == '+' || decimal_parts.int_part.data[pos] == '-') { - pos++; if (decimal_parts.int_part.data[pos] == '-') is_negative = true; + pos++; } Int256 max_value = DecimalMaxValue::Get(prec); diff --git a/tests/fullstack-test/expr/cast_string_as_decimal.test b/tests/fullstack-test/expr/cast_string_as_decimal.test new file mode 100644 index 00000000000..13d908ee6a5 --- /dev/null +++ b/tests/fullstack-test/expr/cast_string_as_decimal.test @@ -0,0 +1,22 @@ +mysql> drop table if exists test.t +mysql> create table test.t(a char(10)) +mysql> alter table test.t set tiflash replica 1 +mysql> insert into test.t values('-123') +mysql> insert into test.t values('2006-01') + +func> wait_table test t + +mysql> set tidb_enforce_mpp=1; select cast(a as decimal) from test.t; ++--------------------+ +| cast(a as decimal) | ++--------------------+ +| -123 | +| 2006 | ++--------------------+ +mysql> set @@tidb_isolation_read_engines='tikv'; select cast(a as decimal) from test.t; ++--------------------+ +| cast(a as decimal) | ++--------------------+ +| -123 | +| 2006 | ++--------------------+ \ No newline at end of file From 485fe78bb79189aa56f80c828bab4affa06174c2 Mon Sep 17 00:00:00 2001 From: Ti Chi Robot Date: Wed, 26 Jan 2022 15:18:45 +0800 Subject: [PATCH 13/21] Fix the bug that results of `where ` is wrong because it will be converted to int type. (#3463) (#3478) --- .../Flash/Coprocessor/DAGExpressionAnalyzer.cpp | 7 ++++--- tests/fullstack-test/expr/issue_3447.test | 15 +++++++++++++++ 2 files changed, 19 insertions(+), 3 deletions(-) create mode 100644 tests/fullstack-test/expr/issue_3447.test diff --git a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp index a4f46ef813b..397ebe9e3c3 100644 --- a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp +++ b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp @@ -609,7 +609,7 @@ String DAGExpressionAnalyzer::convertToUInt8(ExpressionActionsPtr & actions, con // the basic rule is: // 1. if the column is only null, just return it is fine // 2. if the column is numeric, compare it with 0 - // 3. if the column is string, convert it to numeric column, and compare with 0 + // 3. if the column is string, convert it to float-point column, and compare with 0 // 4. if the column is date/datetime, compare it with zeroDate // 5. if the column is other type, throw exception if (actions->getSampleBlock().getByName(column_name).type->onlyNull()) @@ -629,9 +629,10 @@ String DAGExpressionAnalyzer::convertToUInt8(ExpressionActionsPtr & actions, con /// use tidb_cast to make it compatible with TiDB tipb::FieldType field_type; // TODO: Use TypeDouble as return type, to be compatible with TiDB - field_type.set_tp(TiDB::TypeLongLong); + field_type.set_tp(TiDB::TypeDouble); + field_type.set_flen(-1); tipb::Expr type_expr; - constructStringLiteralTiExpr(type_expr, "Nullable(Int64)"); + constructStringLiteralTiExpr(type_expr, "Nullable(Double)"); auto type_expr_name = getActions(type_expr, actions); String num_col_name = buildCastFunctionInternal(this, {column_name, type_expr_name}, false, field_type, actions); diff --git a/tests/fullstack-test/expr/issue_3447.test b/tests/fullstack-test/expr/issue_3447.test new file mode 100644 index 00000000000..53d4bbfb544 --- /dev/null +++ b/tests/fullstack-test/expr/issue_3447.test @@ -0,0 +1,15 @@ +mysql> drop table if exists test.t; +mysql> create table test.t(a char(5)); +mysql> alter table test.t set tiflash replica 1; +mysql> insert into test.t values ('0.1'), ('-0.1'), ('0.0'), ('-1'), ('a0.1'), ('0x01'); + +func> wait_table test t +mysql> select /*+ read_from_storage(tiflash[t]) */ * from test.t where a; ++------+ +| a | ++------+ +| 0.1 | +| -0.1 | +| -1 | +| 0x01 | ++------+ From 9c998047c047731dbbe88476132d54918bb305a8 Mon Sep 17 00:00:00 2001 From: Ti Chi Robot Date: Wed, 26 Jan 2022 16:14:45 +0800 Subject: [PATCH 14/21] Fix throw not constants exception in substringUTF8 (#3259) (#3265) --- dbms/src/Functions/FunctionsString.cpp | 2 +- tests/fullstack-test/expr/substring_utf8.test | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/dbms/src/Functions/FunctionsString.cpp b/dbms/src/Functions/FunctionsString.cpp index 3f68cf645c0..3aa11813a59 100644 --- a/dbms/src/Functions/FunctionsString.cpp +++ b/dbms/src/Functions/FunctionsString.cpp @@ -1248,7 +1248,7 @@ class FunctionSubstringUTF8 : public IFunction } bool useDefaultImplementationForConstants() const override { return true; } - ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; } + ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1, 2}; } DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override { diff --git a/tests/fullstack-test/expr/substring_utf8.test b/tests/fullstack-test/expr/substring_utf8.test index 624553ff033..cea9ec5e009 100644 --- a/tests/fullstack-test/expr/substring_utf8.test +++ b/tests/fullstack-test/expr/substring_utf8.test @@ -20,4 +20,9 @@ mysql> set session tidb_isolation_read_engines='tiflash'; set tidb_allow_mpp=0; count(*) 2 +mysql> set tidb_enforce_mpp=1; set tidb_isolation_read_engines='tiflash'; select substring(t2.a, 1, 3) as a from ( select 1 a from test.t ) t2 +a +1 +1 + mysql> drop table if exists test.t From 999237f841ba1bbe5734ad6944b6a683739dd63d Mon Sep 17 00:00:00 2001 From: Ti Chi Robot Date: Wed, 26 Jan 2022 17:12:46 +0800 Subject: [PATCH 15/21] expression: fix the issue that comparison between Decimal may cause overflow and report `Can't compare`. (#3097) (#3364) --- dbms/src/Core/DecimalComparison.h | 95 ++++----- dbms/src/DataTypes/DataTypeDecimal.h | 57 +++--- .../include/common/arithmeticOverflow.h | 182 ++++++++++-------- tests/fullstack-test/expr/concat.test | 25 ++- .../fullstack-test/expr/decimal_compare.test | 23 +++ 5 files changed, 212 insertions(+), 170 deletions(-) create mode 100644 tests/fullstack-test/expr/decimal_compare.test diff --git a/dbms/src/Core/DecimalComparison.h b/dbms/src/Core/DecimalComparison.h index ec67585203e..21f3cb79c10 100644 --- a/dbms/src/Core/DecimalComparison.h +++ b/dbms/src/Core/DecimalComparison.h @@ -92,6 +92,53 @@ class DecimalComparison return applyWithScale(a, b, shift); } + template + static NO_INLINE UInt8 apply(A a, B b, CompareInt scale [[maybe_unused]]) + { + CompareInt x = static_cast(a); + CompareInt y = static_cast(b); + + if constexpr (_check_overflow) + { + bool invalid = false; + + if constexpr (sizeof(A) > sizeof(CompareInt)) + invalid |= (A(x) != a); + if constexpr (sizeof(B) > sizeof(CompareInt)) + invalid |= (B(y) != b); + if constexpr (std::is_unsigned_v) + invalid |= (x < 0); + if constexpr (std::is_unsigned_v) + invalid |= (y < 0); + + if (invalid) + throw Exception("Can't compare", ErrorCodes::DECIMAL_OVERFLOW); + } + + if constexpr (scale_left && scale_right) + throw DB::Exception("Assumption broken: there should only one side need to be multiplied in decimal comparison.", ErrorCodes::LOGICAL_ERROR); + if constexpr (!scale_left && !scale_right) + return Op::apply(x, y); + + // overflow means absolute value must be greater. + // we use this variable to mark whether the right side is greater than left side by overflow. + int right_side_greater_by_overflow = 0; + if constexpr (scale_left) + { + int sign = boost::math::sign(x); + right_side_greater_by_overflow = -sign * common::mulOverflow(x, scale, x); // x will be changed. + } + if constexpr (scale_right) + { + int sign = boost::math::sign(y); + right_side_greater_by_overflow = sign * common::mulOverflow(y, scale, y); // y will be changed. + } + + if (right_side_greater_by_overflow) + return Op::apply(0, right_side_greater_by_overflow); + return Op::apply(x, y); + } + private: struct Shift @@ -240,53 +287,7 @@ class DecimalComparison } template - static NO_INLINE UInt8 apply(A a, B b, CompareInt scale [[maybe_unused]]) - { - CompareInt x = static_cast(a); - CompareInt y = static_cast(b); - - if constexpr (_check_overflow) - { - bool overflow = false; - - if constexpr (sizeof(A) > sizeof(CompareInt)) - overflow |= (A(x) != a); - if constexpr (sizeof(B) > sizeof(CompareInt)) - overflow |= (B(y) != b); - if constexpr (std::is_unsigned_v) - overflow |= (x < 0); - if constexpr (std::is_unsigned_v) - overflow |= (y < 0); - - if constexpr (scale_left) { - if constexpr (std::is_same_v) - x = x * scale; - else - overflow |= common::mulOverflow(x, scale, x); - } - if constexpr (scale_right) { - if constexpr (std::is_same_v) - y = y * scale; - else - overflow |= common::mulOverflow(y, scale, y); - } - if (overflow) - throw Exception("Can't compare", ErrorCodes::DECIMAL_OVERFLOW); - } - else - { - if constexpr (scale_left) - x *= scale; - if constexpr (scale_right) - y *= scale; - } - - return Op::apply(x, y); - } - - template - static void NO_INLINE vector_vector(const ArrayA & a, const ArrayB & b, PaddedPODArray & c, - CompareInt scale [[maybe_unused]]) + static void NO_INLINE vector_vector(const ArrayA & a, const ArrayB & b, PaddedPODArray & c, CompareInt scale [[maybe_unused]]) { size_t size = a.size(); const A * a_pos = a.data(); diff --git a/dbms/src/DataTypes/DataTypeDecimal.h b/dbms/src/DataTypes/DataTypeDecimal.h index 45518e0023c..af39982ae38 100644 --- a/dbms/src/DataTypes/DataTypeDecimal.h +++ b/dbms/src/DataTypes/DataTypeDecimal.h @@ -9,7 +9,6 @@ namespace DB { - namespace ErrorCodes { extern const int ARGUMENT_OUT_OF_BOUND; @@ -43,9 +42,13 @@ class DataTypeDecimal : public IDataType static constexpr size_t maxPrecision() { return maxDecimalPrecision(); } // If scale is omitted, the default is 0. If precision is omitted, the default is 10. - DataTypeDecimal() : DataTypeDecimal(10, 0) {} + DataTypeDecimal() + : DataTypeDecimal(10, 0) + {} - DataTypeDecimal(size_t precision_, size_t scale_) : precision(precision_), scale(scale_) + DataTypeDecimal(size_t precision_, size_t scale_) + : precision(precision_) + , scale(scale_) { if (precision > decimal_max_prec || scale > precision || scale > decimal_max_scale) { @@ -129,7 +132,7 @@ class DataTypeDecimal : public IDataType template typename T::NativeType scaleFactorFor(const DataTypeDecimal & x) const { - if (scale < x.getScale()) + if (getScale() < x.getScale()) { return 1; } @@ -164,8 +167,8 @@ inline DataTypePtr createDecimal(UInt64 prec, UInt64 scale) if (static_cast(scale) > prec) throw Exception("Negative scales and scales larger than precision are not supported. precision:" + DB::toString(prec) - + ", scale:" + DB::toString(scale), - ErrorCodes::ARGUMENT_OUT_OF_BOUND); + + ", scale:" + DB::toString(scale), + ErrorCodes::ARGUMENT_OUT_OF_BOUND); if (prec <= maxDecimalPrecision()) { @@ -195,7 +198,8 @@ inline bool IsDecimalDataType(const DataTypePtr & type) } template typename std::enable_if_t<(sizeof(T) >= sizeof(U)), const DataTypeDecimal> decimalResultType( - const DataTypeDecimal & tx, const DataTypeDecimal & ty) + const DataTypeDecimal & tx, + const DataTypeDecimal & ty) { UInt32 scale = (tx.getScale() > ty.getScale() ? tx.getScale() : ty.getScale()); return DataTypeDecimal(maxDecimalPrecision(), scale); @@ -203,7 +207,8 @@ typename std::enable_if_t<(sizeof(T) >= sizeof(U)), const DataTypeDecimal> de template typename std::enable_if_t<(sizeof(T) < sizeof(U)), const DataTypeDecimal> decimalResultType( - const DataTypeDecimal & tx, const DataTypeDecimal & ty) + const DataTypeDecimal & tx, + const DataTypeDecimal & ty) { UInt32 scale = (tx.getScale() > ty.getScale() ? tx.getScale() : ty.getScale()); return DataTypeDecimal(maxDecimalPrecision(), scale); @@ -226,24 +231,24 @@ inline UInt32 leastDecimalPrecisionFor(TypeIndex int_type) { switch (int_type) { - case TypeIndex::Int8: - [[fallthrough]]; - case TypeIndex::UInt8: - return 3; - case TypeIndex::Int16: - [[fallthrough]]; - case TypeIndex::UInt16: - return 5; - case TypeIndex::Int32: - [[fallthrough]]; - case TypeIndex::UInt32: - return 10; - case TypeIndex::Int64: - return 19; - case TypeIndex::UInt64: - return 20; - default: - break; + case TypeIndex::Int8: + [[fallthrough]]; + case TypeIndex::UInt8: + return 3; + case TypeIndex::Int16: + [[fallthrough]]; + case TypeIndex::UInt16: + return 5; + case TypeIndex::Int32: + [[fallthrough]]; + case TypeIndex::UInt32: + return 10; + case TypeIndex::Int64: + return 19; + case TypeIndex::UInt64: + return 20; + default: + break; } return 0; } diff --git a/libs/libcommon/include/common/arithmeticOverflow.h b/libs/libcommon/include/common/arithmeticOverflow.h index 38e47ccda57..ba067c92d74 100644 --- a/libs/libcommon/include/common/arithmeticOverflow.h +++ b/libs/libcommon/include/common/arithmeticOverflow.h @@ -2,105 +2,119 @@ namespace common { - template - inline bool addOverflow(T x, T y, T & res) - { - return __builtin_add_overflow(x, y, &res); - } +template +inline bool addOverflow(T x, T y, T & res) +{ + return __builtin_add_overflow(x, y, &res); +} - template <> - inline bool addOverflow(int x, int y, int & res) - { - return __builtin_sadd_overflow(x, y, &res); - } +template <> +inline bool addOverflow(int x, int y, int & res) +{ + return __builtin_sadd_overflow(x, y, &res); +} - template <> - inline bool addOverflow(long x, long y, long & res) - { - return __builtin_saddl_overflow(x, y, &res); - } +template <> +inline bool addOverflow(long x, long y, long & res) +{ + return __builtin_saddl_overflow(x, y, &res); +} - template <> - inline bool addOverflow(long long x, long long y, long long & res) - { - return __builtin_saddll_overflow(x, y, &res); - } +template <> +inline bool addOverflow(long long x, long long y, long long & res) +{ + return __builtin_saddll_overflow(x, y, &res); +} - template <> - inline bool addOverflow(__int128 x, __int128 y, __int128 & res) - { - static constexpr __int128 min_int128 = __int128(0x8000000000000000ll) << 64; - static constexpr __int128 max_int128 = (__int128(0x7fffffffffffffffll) << 64) + 0xffffffffffffffffll; - res = x + y; - return (y > 0 && x > max_int128 - y) || (y < 0 && x < min_int128 - y); - } +template <> +inline bool addOverflow(__int128 x, __int128 y, __int128 & res) +{ + static constexpr __int128 min_int128 = __int128(0x8000000000000000ll) << 64; + static constexpr __int128 max_int128 = (__int128(0x7fffffffffffffffll) << 64) + 0xffffffffffffffffll; + res = x + y; + return (y > 0 && x > max_int128 - y) || (y < 0 && x < min_int128 - y); +} - template - inline bool subOverflow(T x, T y, T & res) - { - return __builtin_sub_overflow(x, y, &res); - } +template +inline bool subOverflow(T x, T y, T & res) +{ + return __builtin_sub_overflow(x, y, &res); +} - template <> - inline bool subOverflow(int x, int y, int & res) - { - return __builtin_ssub_overflow(x, y, &res); - } +template <> +inline bool subOverflow(int x, int y, int & res) +{ + return __builtin_ssub_overflow(x, y, &res); +} - template <> - inline bool subOverflow(long x, long y, long & res) - { - return __builtin_ssubl_overflow(x, y, &res); - } +template <> +inline bool subOverflow(long x, long y, long & res) +{ + return __builtin_ssubl_overflow(x, y, &res); +} - template <> - inline bool subOverflow(long long x, long long y, long long & res) - { - return __builtin_ssubll_overflow(x, y, &res); - } +template <> +inline bool subOverflow(long long x, long long y, long long & res) +{ + return __builtin_ssubll_overflow(x, y, &res); +} - template <> - inline bool subOverflow(__int128 x, __int128 y, __int128 & res) - { - static constexpr __int128 min_int128 = __int128(0x8000000000000000ll) << 64; - static constexpr __int128 max_int128 = (__int128(0x7fffffffffffffffll) << 64) + 0xffffffffffffffffll; - res = x - y; - return (y < 0 && x > max_int128 + y) || (y > 0 && x < min_int128 + y); - } +template <> +inline bool subOverflow(__int128 x, __int128 y, __int128 & res) +{ + static constexpr __int128 min_int128 = __int128(0x8000000000000000ll) << 64; + static constexpr __int128 max_int128 = (__int128(0x7fffffffffffffffll) << 64) + 0xffffffffffffffffll; + res = x - y; + return (y < 0 && x > max_int128 + y) || (y > 0 && x < min_int128 + y); +} - template - inline bool mulOverflow(T x, T y, T & res) - { - return __builtin_mul_overflow(x, y, &res); - } +template +inline bool mulOverflow(T x, T y, T & res) +{ + return __builtin_mul_overflow(x, y, &res); +} - template <> - inline bool mulOverflow(int x, int y, int & res) - { - return __builtin_smul_overflow(x, y, &res); - } +template <> +inline bool mulOverflow(int x, int y, int & res) +{ + return __builtin_smul_overflow(x, y, &res); +} - template <> - inline bool mulOverflow(long x, long y, long & res) - { - return __builtin_smull_overflow(x, y, &res); - } +template <> +inline bool mulOverflow(long x, long y, long & res) +{ + return __builtin_smull_overflow(x, y, &res); +} + +template <> +inline bool mulOverflow(long long x, long long y, long long & res) +{ + return __builtin_smulll_overflow(x, y, &res); +} - template <> - inline bool mulOverflow(long long x, long long y, long long & res) +template <> +inline bool mulOverflow(__int128 x, __int128 y, __int128 & res) +{ + res = static_cast(x) * static_cast(y); /// Avoid signed integer overflow. + if (!x || !y) + return false; + + unsigned __int128 a = (x > 0) ? x : -x; + unsigned __int128 b = (y > 0) ? y : -y; + return (a * b) / b != a; +} + +template <> +inline bool mulOverflow(DB::Int256 x, DB::Int256 y, DB::Int256 & res) +{ + try { - return __builtin_smulll_overflow(x, y, &res); + res = x * y; } - - template <> - inline bool mulOverflow(__int128 x, __int128 y, __int128 & res) + catch (std::overflow_error &) { - res = static_cast(x) * static_cast(y); /// Avoid signed integer overflow. - if (!x || !y) - return false; - - unsigned __int128 a = (x > 0) ? x : -x; - unsigned __int128 b = (y > 0) ? y : -y; - return (a * b) / b != a; + return true; } + return false; } +} // namespace common diff --git a/tests/fullstack-test/expr/concat.test b/tests/fullstack-test/expr/concat.test index 2cfb69eacba..36836d183d1 100644 --- a/tests/fullstack-test/expr/concat.test +++ b/tests/fullstack-test/expr/concat.test @@ -1,16 +1,15 @@ -# known issue, wait for first-row fix. -#mysql> drop table if exists test.t -#mysql> create table test.t(a char(5), b char(5)) -#mysql> alter table test.t set tiflash replica 1 -#mysql> insert into test.t values(null, 'y') -#mysql> insert into test.t values('x', null) -#func> wait_table test t -#mysql> select concat(a, b), count(*) from test.t group by 1; -#+-----------------+----------+ -#| concat(a, b) | count(*) | -#+-----------------+----------+ -#| NULL | 2 | -#+-----------------+----------+ +mysql> drop table if exists test.t +mysql> create table test.t(a char(5), b char(5)) +mysql> alter table test.t set tiflash replica 1 +mysql> insert into test.t values(null, 'y') +mysql> insert into test.t values('x', null) +func> wait_table test t +mysql> select concat(a, b), count(*) from test.t group by 1; ++-----------------+----------+ +| concat(a, b) | count(*) | ++-----------------+----------+ +| NULL | 2 | ++-----------------+----------+ mysql> drop table if exists test.t mysql> create table test.t(a int, b char(10), c varchar(10)); diff --git a/tests/fullstack-test/expr/decimal_compare.test b/tests/fullstack-test/expr/decimal_compare.test new file mode 100644 index 00000000000..c854dad6c03 --- /dev/null +++ b/tests/fullstack-test/expr/decimal_compare.test @@ -0,0 +1,23 @@ +mysql> drop table if exists test.t +mysql> create table test.t(a decimal(9,5)) +mysql> alter table test.t set tiflash replica 1 +mysql> insert into test.t values (3); +func> wait_table test t +mysql> set tidb_enforce_mpp=1; select 1/2 drop table if exists test.s +mysql> CREATE TABLE test.s(a decimal(65,0), b decimal(30,30)); +mysql> insert into test.s values(12345678911234567891123456789112345678911234567891123456789112345, 0.1); +mysql> alter table test.s set tiflash replica 1; +func> wait_table test s +mysql> set tidb_enforce_mpp=1; SELECT a Date: Wed, 26 Jan 2022 18:06:45 +0800 Subject: [PATCH 16/21] fix throw `Unexpected type of column: Nullable(Nothing)` in LogicalOp (#3371) (#3377) --- dbms/src/Functions/FunctionsLogical.h | 69 +++++++++++++++-------- tests/fullstack-test/expr/logical_op.test | 44 +++++++++++++++ 2 files changed, 88 insertions(+), 25 deletions(-) diff --git a/dbms/src/Functions/FunctionsLogical.h b/dbms/src/Functions/FunctionsLogical.h index 4d8d7b71548..5a03bbcf03d 100644 --- a/dbms/src/Functions/FunctionsLogical.h +++ b/dbms/src/Functions/FunctionsLogical.h @@ -258,7 +258,7 @@ class FunctionAnyArityLogical : public IFunction } template - bool convertTypeToUInt8(const IColumn * column, UInt8Container & res, UInt8Container & res_not_null) + bool convertTypeToUInt8(const IColumn * column, UInt8Container & res, UInt8Container & res_not_null) const { auto col = checkAndGetColumn>(column); if (!col) @@ -275,9 +275,28 @@ class FunctionAnyArityLogical : public IFunction return true; } + bool convertOnlyNullToUInt8(const IColumn * column, UInt8Container & res, UInt8Container & res_not_null, UInt8Container & input_has_null) const + { + if (!column->onlyNull()) + return false; + + size_t n = res.size(); + for (size_t i = 0; i < n; ++i) + { + res[i] = false; + if constexpr (special_impl_for_nulls) + { + res_not_null[i] |= Impl::resNotNull(res[i], true); + input_has_null[i] |= true; + } + } + + return true; + } + template bool convertNullableTypeToUInt8(const IColumn * column, UInt8Container & res, UInt8Container & res_not_null, - UInt8Container & input_has_null) + UInt8Container & input_has_null) const { auto col_nullable = checkAndGetColumn(column); @@ -301,29 +320,29 @@ class FunctionAnyArityLogical : public IFunction return true; } - - void convertToUInt8(const IColumn * column, UInt8Container & res, UInt8Container & res_not_null, - UInt8Container & input_has_null) - { - if (!convertTypeToUInt8(column, res, res_not_null) && - !convertTypeToUInt8(column, res, res_not_null) && - !convertTypeToUInt8(column, res, res_not_null) && - !convertTypeToUInt8(column, res, res_not_null) && - !convertTypeToUInt8(column, res, res_not_null) && - !convertTypeToUInt8(column, res, res_not_null) && - !convertTypeToUInt8(column, res, res_not_null) && - !convertTypeToUInt8(column, res, res_not_null) && - !convertTypeToUInt8(column, res, res_not_null) && - !convertNullableTypeToUInt8(column, res, res_not_null, input_has_null) && - !convertNullableTypeToUInt8(column, res, res_not_null, input_has_null) && - !convertNullableTypeToUInt8(column, res, res_not_null, input_has_null) && - !convertNullableTypeToUInt8(column, res, res_not_null, input_has_null) && - !convertNullableTypeToUInt8(column, res, res_not_null, input_has_null) && - !convertNullableTypeToUInt8(column, res, res_not_null, input_has_null) && - !convertNullableTypeToUInt8(column, res, res_not_null, input_has_null) && - !convertNullableTypeToUInt8(column, res, res_not_null, input_has_null) && - !convertNullableTypeToUInt8(column, res, res_not_null, input_has_null) && - !convertNullableTypeToUInt8(column, res, res_not_null, input_has_null)) + + void convertToUInt8(const IColumn * column, UInt8Container & res, UInt8Container & res_not_null, UInt8Container & input_has_null) const + { + if (!convertTypeToUInt8(column, res, res_not_null) + && !convertTypeToUInt8(column, res, res_not_null) + && !convertTypeToUInt8(column, res, res_not_null) + && !convertTypeToUInt8(column, res, res_not_null) + && !convertTypeToUInt8(column, res, res_not_null) + && !convertTypeToUInt8(column, res, res_not_null) + && !convertTypeToUInt8(column, res, res_not_null) + && !convertTypeToUInt8(column, res, res_not_null) + && !convertTypeToUInt8(column, res, res_not_null) + && !convertNullableTypeToUInt8(column, res, res_not_null, input_has_null) + && !convertNullableTypeToUInt8(column, res, res_not_null, input_has_null) + && !convertNullableTypeToUInt8(column, res, res_not_null, input_has_null) + && !convertNullableTypeToUInt8(column, res, res_not_null, input_has_null) + && !convertNullableTypeToUInt8(column, res, res_not_null, input_has_null) + && !convertNullableTypeToUInt8(column, res, res_not_null, input_has_null) + && !convertNullableTypeToUInt8(column, res, res_not_null, input_has_null) + && !convertNullableTypeToUInt8(column, res, res_not_null, input_has_null) + && !convertNullableTypeToUInt8(column, res, res_not_null, input_has_null) + && !convertNullableTypeToUInt8(column, res, res_not_null, input_has_null) + && !convertOnlyNullToUInt8(column, res, res_not_null, input_has_null)) throw Exception("Unexpected type of column: " + column->getName(), ErrorCodes::ILLEGAL_COLUMN); } diff --git a/tests/fullstack-test/expr/logical_op.test b/tests/fullstack-test/expr/logical_op.test index 7d6b8bd2be2..b64b4ff35e3 100644 --- a/tests/fullstack-test/expr/logical_op.test +++ b/tests/fullstack-test/expr/logical_op.test @@ -1,14 +1,19 @@ mysql> drop table if exists test.t1; mysql> drop table if exists test.t2; +mysql> drop table if exists test.t3; mysql> create table test.t1(a char(20),b double); mysql> create table test.t2(a char(20)); +mysql> create table test.t3(a int); mysql> insert into test.t1 values(1,null),('j',0),(1,12.991),(0,0),(0,0),('they',1.009),('can',-99),(0,12.991),(1,-9.183),(null,1); mysql> insert into test.t2 values(0),(0),(0),(0),(0),(0),(1),('with'),('see'),(null); +mysql> insert into test.t3 values(0),(1); mysql> alter table test.t1 set tiflash replica 1; mysql> alter table test.t2 set tiflash replica 1; +mysql> alter table test.t3 set tiflash replica 1; func> wait_table test t1 func> wait_table test t2 +func> wait_table test t3 mysql> set session tidb_isolation_read_engines='tiflash'; select count(*) from test.t1 where (b between null and 100) is null; +----------+ @@ -24,5 +29,44 @@ mysql> set session tidb_allow_mpp=1; set session tidb_isolation_read_engines='ti | 10 | +----------+ +mysql> set tidb_enforce_mpp=1; set tidb_isolation_read_engines='tiflash'; select null and a > 0, a from test.t3; ++----------------+------+ +| null and a > 0 | a | ++----------------+------+ +| 0 | 0 | +| NULL | 1 | ++----------------+------+ + +mysql> set tidb_enforce_mpp=1; set tidb_isolation_read_engines='tiflash'; select null or a > 0, a from test.t3; ++---------------+------+ +| null or a > 0 | a | ++---------------+------+ +| NULL | 0 | +| 1 | 1 | ++---------------+------+ + +mysql> set tidb_enforce_mpp=1; set tidb_isolation_read_engines='tiflash'; select null xor a > 0, a from test.t3; ++----------------+------+ +| null xor a > 0 | a | ++----------------+------+ +| NULL | 0 | +| NULL | 1 | ++----------------+------+ + +mysql> set tidb_enforce_mpp=1; set tidb_isolation_read_engines='tiflash'; select !null, a from test.t3; ++-------+------+ +| !null | a | ++-------+------+ +| NULL | 0 | +| NULL | 1 | ++-------+------+ + +mysql> set tidb_enforce_mpp=1; set tidb_isolation_read_engines='tiflash'; select count(*) from test.t3 group by a having min(null) and a > 0; +# empty + +mysql> set tidb_enforce_mpp=1; set tidb_isolation_read_engines='tiflash'; select count(*) from test.t3 group by a having ifnull(null,count(*)) and min(null); +# empty + #mysql> drop table if exists test.t1; #mysql> drop table if exists test.t2; +#mysql> drop table if exists test.t3; From 79df41b8bfc4eb8b9e106502edeace294a63c893 Mon Sep 17 00:00:00 2001 From: Ti Chi Robot Date: Wed, 26 Jan 2022 20:34:45 +0800 Subject: [PATCH 17/21] expression: Fix the bug that castStringAsReal has different behaivor between tiflash and tikv/tidb. (#3681) (#3755) --- dbms/src/Functions/FunctionsTiDBConversion.h | 2 ++ tests/fullstack-test/expr/cast_string_as_real.test | 12 +++++++++--- tests/fullstack-test/expr/issue_3447.test | 1 - 3 files changed, 11 insertions(+), 4 deletions(-) diff --git a/dbms/src/Functions/FunctionsTiDBConversion.h b/dbms/src/Functions/FunctionsTiDBConversion.h index b41c105566d..027c16f361a 100644 --- a/dbms/src/Functions/FunctionsTiDBConversion.h +++ b/dbms/src/Functions/FunctionsTiDBConversion.h @@ -664,6 +664,8 @@ struct TiDBConvertToFloat context.getDAGContext()->handleTruncateError("Truncated incorrect DOUBLE value"); return 0.0; } + if (float_string.size < trim_string.size()) + trim_string[float_string.size] = '\0'; Float64 f = strtod(float_string.data, nullptr); if (f == std::numeric_limits::infinity()) { diff --git a/tests/fullstack-test/expr/cast_string_as_real.test b/tests/fullstack-test/expr/cast_string_as_real.test index ab8f2852589..c81cfa2a604 100644 --- a/tests/fullstack-test/expr/cast_string_as_real.test +++ b/tests/fullstack-test/expr/cast_string_as_real.test @@ -1,29 +1,35 @@ mysql> drop table if exists test.t mysql> create table test.t(a char(30)) mysql> alter table test.t set tiflash replica 1 -mysql> insert into test.t values ('1.23'),('123'),('-123.99'),('+123.123-'),(0),(0.0),(NULL),('1.11.00'),('11xx'),('11.xx'),('xx.11'),('1e649'),('-1e649'),('9.9999999999999999'),('9.999999999999999') +mysql> insert into test.t values ('1.23'),('123'),('-123.99'),('+123.123-'),(0),(0.0),(NULL),('1.11.00'),('11xx'),('11.xx'),('xx.11'),('1e649'),('-1e649'),('9.9999999999999999'),('9.999999999999999'); +mysql> insert into test.t values ('0x01'),('-0x01'),('1x01'),('-1x01.2'),('0x01.2'),('x1'); func> wait_table test t -mysql> set tidb_allow_mpp=1; set tidb_isolation_read_engines='tiflash'; select a, b from (select a, cast(a as double) as b from test.t) t group by a, b order by a +mysql> set tidb_enforce_mpp=1; set tidb_isolation_read_engines='tiflash'; select a, cast(a as double) b from test.t order by a; +--------------------+-------------------------+ | a | b | +--------------------+-------------------------+ | NULL | NULL | | +123.123- | 123.123 | +| -0x01 | -0 | | -123.99 | -123.99 | | -1e649 | -1.7976931348623157e308 | +| -1x01.2 | -1 | | 0 | 0 | | 0.0 | 0 | +| 0x01 | 0 | +| 0x01.2 | 0 | | 1.11.00 | 1.11 | | 1.23 | 1.23 | | 11.xx | 11 | | 11xx | 11 | | 123 | 123 | | 1e649 | 1.7976931348623157e308 | +| 1x01 | 1 | | 9.999999999999999 | 9.999999999999998 | | 9.9999999999999999 | 10 | +| x1 | 0 | | xx.11 | 0 | +--------------------+-------------------------+ -mysql> drop table if exists test.t diff --git a/tests/fullstack-test/expr/issue_3447.test b/tests/fullstack-test/expr/issue_3447.test index 53d4bbfb544..e0321e8c60e 100644 --- a/tests/fullstack-test/expr/issue_3447.test +++ b/tests/fullstack-test/expr/issue_3447.test @@ -11,5 +11,4 @@ mysql> select /*+ read_from_storage(tiflash[t]) */ * from test.t where a; | 0.1 | | -0.1 | | -1 | -| 0x01 | +------+ From 6351388c68692e684842bc88970a93a63337f308 Mon Sep 17 00:00:00 2001 From: Ti Chi Robot Date: Wed, 26 Jan 2022 21:16:45 +0800 Subject: [PATCH 18/21] cherry pick PR#3283 about sending schemas in exchangeSender (#3318) (#3370) --- contrib/tipb | 2 +- dbms/src/Flash/Coprocessor/DAGQueryBlock.cpp | 11 +++ .../fullstack-test-dt/union_push_down.test | 96 +++++++++++++++++++ 3 files changed, 108 insertions(+), 1 deletion(-) create mode 100644 tests/tidb-ci/fullstack-test-dt/union_push_down.test diff --git a/contrib/tipb b/contrib/tipb index 79a378b6d1c..e1c7362eeeb 160000 --- a/contrib/tipb +++ b/contrib/tipb @@ -1 +1 @@ -Subproject commit 79a378b6d1c4968c388d93886362a9bbcb79a285 +Subproject commit e1c7362eeeb418cdcb481de42af05f6cc9649093 diff --git a/dbms/src/Flash/Coprocessor/DAGQueryBlock.cpp b/dbms/src/Flash/Coprocessor/DAGQueryBlock.cpp index 16dfc1696bd..e18eb2d872d 100644 --- a/dbms/src/Flash/Coprocessor/DAGQueryBlock.cpp +++ b/dbms/src/Flash/Coprocessor/DAGQueryBlock.cpp @@ -222,6 +222,17 @@ DAGQueryBlock::DAGQueryBlock(UInt32 id_, const ::google::protobuf::RepeatedPtrFi void DAGQueryBlock::fillOutputFieldTypes() { + /// the top block has exchangeSender, which decides the output fields, keeping the same with exchangeReceiver + if (exchangeSender != nullptr && exchangeSender->has_exchange_sender() && !exchangeSender->exchange_sender().all_field_types().empty()) + { + output_field_types.clear(); + for (auto & field_type : exchangeSender->exchange_sender().all_field_types()) + { + output_field_types.push_back(field_type); + } + return; + } + /// the non-top block if (!output_field_types.empty()) { return; diff --git a/tests/tidb-ci/fullstack-test-dt/union_push_down.test b/tests/tidb-ci/fullstack-test-dt/union_push_down.test new file mode 100644 index 00000000000..30ea7576c7e --- /dev/null +++ b/tests/tidb-ci/fullstack-test-dt/union_push_down.test @@ -0,0 +1,96 @@ +# the null flag should be aligned for each sub select clause under union, this test is related to the tidb PR https://github.com/pingcap/tidb/pull/28990 + +mysql> drop table if exists test.t; +mysql> drop table if exists test.tt; +mysql> create table test.t (id int, d double, nd double not null); +mysql> alter table test.t set tiflash replica 1; +mysql> insert into test.t values(0,0,0),(1,1,1),(2,null,2); +mysql> analyze table test.t; +mysql> create table test.tt like test.t; +mysql> alter table test.tt set tiflash replica 1; +mysql> insert into test.tt select * from test.t; +mysql> insert into test.tt select * from test.t; +mysql> insert into test.tt select * from test.t; +mysql> insert into test.tt select * from test.t; +mysql> analyze table test.tt; + +func> wait_table test t +func> wait_table test tt + +mysql> use test; set @@tidb_allow_mpp=1; set @@tidb_enforce_mpp=1; Select DD,NDD,IDD from tt join ( (select d as DD, nd as NDD, id as IDD from t) union all (select d as DD, 0 as NDD, id as IDD from t) union all (select d as DD, nd as NDD, 0 as IDD from t) ) u on tt.id = u.IDD; ++------+------+------+ +| DD | NDD | IDD | ++------+------+------+ +| 0 | 0 | 0 | +| 0 | 0 | 0 | +| 0 | 0 | 0 | +| NULL | 2 | 0 | +| 1 | 1 | 0 | +| 1 | 1 | 1 | +| 1 | 0 | 1 | +| NULL | 2 | 2 | +| NULL | 0 | 2 | +| 0 | 0 | 0 | +| 0 | 0 | 0 | +| 0 | 0 | 0 | +| NULL | 2 | 0 | +| 1 | 1 | 0 | +| 1 | 1 | 1 | +| 1 | 0 | 1 | +| NULL | 2 | 2 | +| NULL | 0 | 2 | +| 0 | 0 | 0 | +| 0 | 0 | 0 | +| 0 | 0 | 0 | +| NULL | 2 | 0 | +| 1 | 1 | 0 | +| 1 | 1 | 1 | +| 1 | 0 | 1 | +| NULL | 2 | 2 | +| NULL | 0 | 2 | +| 0 | 0 | 0 | +| 0 | 0 | 0 | +| 0 | 0 | 0 | +| NULL | 2 | 0 | +| 1 | 1 | 0 | +| 1 | 1 | 1 | +| 1 | 0 | 1 | +| NULL | 2 | 2 | +| NULL | 0 | 2 | ++------+------+------+ + +mysql> use test; set @@tidb_allow_mpp=1; set @@tidb_enforce_mpp=1; Select IDD from tt join ( (select 127 as IDD from t) union all (select id as IDD from t) ) u on tt.id = u.IDD; ++------+ +| IDD | ++------+ +| 0 | +| 1 | +| 2 | +| 0 | +| 1 | +| 2 | +| 0 | +| 1 | +| 2 | +| 0 | +| 1 | +| 2 | ++------+ + +mysql> use test; set @@tidb_allow_mpp=1; set @@tidb_enforce_mpp=1; Select IDD from tt join ( (select 127 as IDD from t) union all (select 1 as IDD from t) ) u on tt.id = u.IDD; ++------+ +| IDD | ++------+ +| 1 | +| 1 | +| 1 | +| 1 | +| 1 | +| 1 | +| 1 | +| 1 | +| 1 | +| 1 | +| 1 | +| 1 | ++------+ From 2748f10c48feb255a61e48b48fd708a4338ec5f5 Mon Sep 17 00:00:00 2001 From: Ti Chi Robot Date: Wed, 26 Jan 2022 21:58:44 +0800 Subject: [PATCH 19/21] Add unittests for str_to_date, fix #3556, #3557 (#3581) (#3934) --- dbms/src/Common/MyTime.cpp | 71 ++++++++----------- dbms/src/Common/tests/gtest_mytime.cpp | 89 ++++++++++++++++++++++++ dbms/src/Functions/FunctionsConversion.h | 17 ++++- 3 files changed, 133 insertions(+), 44 deletions(-) diff --git a/dbms/src/Common/MyTime.cpp b/dbms/src/Common/MyTime.cpp index 1436ad08342..4e9aacf5dbb 100644 --- a/dbms/src/Common/MyTime.cpp +++ b/dbms/src/Common/MyTime.cpp @@ -72,7 +72,7 @@ std::vector parseDateFormat(String format) { format = Poco::trimInPlace(format); - if (format.size() == 0) + if (format.empty()) return {}; if (!std::isdigit(format[0]) || !std::isdigit(format[format.size() - 1])) @@ -530,7 +530,7 @@ Field parseMyDateTime(const String & str, int8_t fsp) { // if tz_sign is empty, it's sure that the string literal contains timezone (e.g., 2010-10-10T10:10:10Z), // therefore we could safely skip this branch. - if (!noAbsorb(seps) && !(tz_minute != "" && tz_sep == "")) + if (!noAbsorb(seps) && !(!tz_minute.empty() && tz_sep.empty())) { // we can't absorb timezone if there is no separate between tz_hour and tz_minute if (!tz_hour.empty()) @@ -852,9 +852,8 @@ size_t maxFormattedDateTimeStringLength(const String & format) { size_t result = 0; bool in_pattern_match = false; - for (size_t i = 0; i < format.size(); i++) + for (char x : format) { - char x = format[i]; if (in_pattern_match) { switch (x) @@ -969,7 +968,6 @@ void MyTimeBase::check(bool allow_zero_in_date, bool allow_invalid_date) const { throw TiFlashException("Incorrect datetime value", Errors::Types::WrongValue); } - return; } bool toCoreTimeChecked(const UInt64 & year, const UInt64 & month, const UInt64 & day, const UInt64 & hour, const UInt64 & minute, @@ -990,9 +988,8 @@ bool toCoreTimeChecked(const UInt64 & year, const UInt64 & month, const UInt64 & MyDateTimeFormatter::MyDateTimeFormatter(const String & layout) { bool in_pattern_match = false; - for (size_t i = 0; i < layout.size(); i++) + for (char x : layout) { - char x = layout[i]; if (in_pattern_match) { switch (x) @@ -1227,7 +1224,9 @@ struct MyDateTimeParser::Context // The pos we are parsing from size_t pos = 0; - Context(StringRef view_) : view(std::move(view_)) {} + explicit Context(StringRef view_) + : view(std::move(view_)) + {} }; // Try to parse digits with number of `limit` starting from view[pos] @@ -1270,18 +1269,18 @@ static bool parseTime12Hour(MyDateTimeParser::Context & ctx, MyTimeBase & time) { // Use temp_pos instead of changing `ctx.pos` directly in case of parsing failure size_t temp_pos = ctx.pos; - auto checkIfEnd = [&temp_pos, &ctx]() -> ParseState { + auto check_if_end = [&temp_pos, &ctx]() -> ParseState { // To the end if (temp_pos == ctx.view.size) return ParseState::END_OF_FILE; return ParseState::NORMAL; }; - auto skipWhitespaces = [&temp_pos, &ctx, &checkIfEnd]() -> ParseState { + auto skipWhitespaces = [&temp_pos, &ctx, &check_if_end]() -> ParseState { while (temp_pos < ctx.view.size && isWhitespaceASCII(ctx.view.data[temp_pos])) ++temp_pos; - return checkIfEnd(); + return check_if_end(); }; - auto parseSep = [&temp_pos, &ctx, &skipWhitespaces]() -> ParseState { + auto parse_sep = [&temp_pos, &ctx, &skipWhitespaces]() -> ParseState { if (skipWhitespaces() == ParseState::END_OF_FILE) return ParseState::END_OF_FILE; // parse ":" @@ -1290,7 +1289,7 @@ static bool parseTime12Hour(MyDateTimeParser::Context & ctx, MyTimeBase & time) temp_pos += 1; // move forward return ParseState::NORMAL; }; - auto tryParse = [&]() -> ParseState { + auto try_parse = [&]() -> ParseState { ParseState state = ParseState::NORMAL; /// Note that we should update `time` as soon as possible, or we /// can not get correct result for incomplete input like "12:13" @@ -1311,7 +1310,7 @@ static bool parseTime12Hour(MyDateTimeParser::Context & ctx, MyTimeBase & time) time.hour = hour; temp_pos += step; // move forward - if (state = parseSep(); state != ParseState::NORMAL) + if (state = parse_sep(); state != ParseState::NORMAL) return state; int32_t minute = 0; @@ -1323,7 +1322,7 @@ static bool parseTime12Hour(MyDateTimeParser::Context & ctx, MyTimeBase & time) time.minute = minute; temp_pos += step; // move forward - if (state = parseSep(); state != ParseState::NORMAL) + if (state = parse_sep(); state != ParseState::NORMAL) return state; int32_t second = 0; @@ -1362,7 +1361,7 @@ static bool parseTime12Hour(MyDateTimeParser::Context & ctx, MyTimeBase & time) temp_pos += 2; // move forward return ParseState::NORMAL; }; - if (auto state = tryParse(); state == ParseState::FAIL) + if (auto state = try_parse(); state == ParseState::FAIL) return false; // Other state, forward the `ctx.pos` and return true ctx.pos = temp_pos; @@ -1374,18 +1373,18 @@ static bool parseTime24Hour(MyDateTimeParser::Context & ctx, MyTimeBase & time) { // Use temp_pos instead of changing `ctx.pos` directly in case of parsing failure size_t temp_pos = ctx.pos; - auto checkIfEnd = [&temp_pos, &ctx]() -> ParseState { + auto check_if_end = [&temp_pos, &ctx]() -> ParseState { // To the end if (temp_pos == ctx.view.size) return ParseState::END_OF_FILE; return ParseState::NORMAL; }; - auto skipWhitespaces = [&temp_pos, &ctx, &checkIfEnd]() -> ParseState { + auto skipWhitespaces = [&temp_pos, &ctx, &check_if_end]() -> ParseState { while (temp_pos < ctx.view.size && isWhitespaceASCII(ctx.view.data[temp_pos])) ++temp_pos; - return checkIfEnd(); + return check_if_end(); }; - auto parseSep = [&temp_pos, &ctx, &skipWhitespaces]() -> ParseState { + auto parse_sep = [&temp_pos, &ctx, &skipWhitespaces]() -> ParseState { if (skipWhitespaces() == ParseState::END_OF_FILE) return ParseState::END_OF_FILE; // parse ":" @@ -1394,7 +1393,7 @@ static bool parseTime24Hour(MyDateTimeParser::Context & ctx, MyTimeBase & time) temp_pos += 1; // move forward return ParseState::NORMAL; }; - auto tryParse = [&]() -> ParseState { + auto try_parse = [&]() -> ParseState { ParseState state = ParseState::NORMAL; /// Note that we should update `time` as soon as possible, or we /// can not get correct result for incomplete input like "12:13" @@ -1411,7 +1410,7 @@ static bool parseTime24Hour(MyDateTimeParser::Context & ctx, MyTimeBase & time) time.hour = hour; temp_pos += step; // move forward - if (state = parseSep(); state != ParseState::NORMAL) + if (state = parse_sep(); state != ParseState::NORMAL) return state; int32_t minute = 0; @@ -1423,7 +1422,7 @@ static bool parseTime24Hour(MyDateTimeParser::Context & ctx, MyTimeBase & time) time.minute = minute; temp_pos += step; // move forward - if (state = parseSep(); state != ParseState::NORMAL) + if (state = parse_sep(); state != ParseState::NORMAL) return state; int32_t second = 0; @@ -1437,7 +1436,7 @@ static bool parseTime24Hour(MyDateTimeParser::Context & ctx, MyTimeBase & time) return ParseState::NORMAL; }; - if (auto state = tryParse(); state == ParseState::FAIL) + if (auto state = try_parse(); state == ParseState::FAIL) return false; // Other state, forward the `ctx.pos` and return true ctx.pos = temp_pos; @@ -1482,6 +1481,9 @@ MyDateTimeParser::MyDateTimeParser(String format_) : format(std::move(format_)) }); break; } + case 'm': + //"%m": Month, numeric (00..12) + [[fallthrough]]; case 'c': { //"%c": Month, numeric (0..12) @@ -1523,9 +1525,9 @@ MyDateTimeParser::MyDateTimeParser(String format_) : format(std::move(format_)) time.micro_second = 0; return true; } - // The siffix '0' can be ignored. + // The suffix '0' can be ignored. // "9" means 900000 - while (ms > 0 && ms * 10 < 1000000) + for (size_t i = step; i < 6; i++) { ms *= 10; } @@ -1621,19 +1623,6 @@ MyDateTimeParser::MyDateTimeParser(String format_) : format(std::move(format_)) }); break; } - case 'm': - { - //"%m": Month, numeric (00..12) - parsers.emplace_back([](MyDateTimeParser::Context & ctx, MyTimeBase & time) -> bool { - auto [step, month] = parseNDigits(ctx.view, ctx.pos, 2); - if (step == 0 || month > 12) - return false; - time.month = month; - ctx.pos += step; - return true; - }); - break; - } case 'S': //"%S": Seconds (00..59) [[fallthrough]]; @@ -1880,7 +1869,7 @@ std::optional MyDateTimeParser::parseAsPackedUInt(const StringRef & str_ MyDateTimeParser::Context ctx(str_view); // TODO: can we return warnings to TiDB? - for (auto & f : parsers) + for (const auto & f : parsers) { // Ignore all prefix white spaces before each pattern match (TODO: handle unicode space?) while (ctx.pos < str_view.size && isWhitespaceASCII(str_view.data[ctx.pos])) @@ -1889,7 +1878,7 @@ std::optional MyDateTimeParser::parseAsPackedUInt(const StringRef & str_ if (ctx.pos == ctx.view.size) break; - if (f(ctx, my_time) != true) + if (!f(ctx, my_time)) { #ifndef NDEBUG LOG_TRACE(&Logger::get("MyDateTimeParser"), diff --git a/dbms/src/Common/tests/gtest_mytime.cpp b/dbms/src/Common/tests/gtest_mytime.cpp index 31647d0787c..376ec0bf534 100644 --- a/dbms/src/Common/tests/gtest_mytime.cpp +++ b/dbms/src/Common/tests/gtest_mytime.cpp @@ -393,7 +393,96 @@ try // {"Tuesday 52 2001", "%W %V %Y", std::nullopt}, // // {"Tuesday 52 2001", "%W %V %x", std::nullopt}, // // {"Tuesday 52 2001", "%W %u %x", std::nullopt}, // + + // Test cases for %b + {"10/JAN/2010", "%d/%b/%Y", MyDateTime{2010, 1, 10, 0, 0, 0, 0}}, // Right spill, case-insensitive + {"10/FeB/2010", "%d/%b/%Y", MyDateTime{2010, 2, 10, 0, 0, 0, 0}}, + {"10/MAr/2010", "%d/%b/%Y", MyDateTime{2010, 3, 10, 0, 0, 0, 0}}, + {"10/ApR/2010", "%d/%b/%Y", MyDateTime{2010, 4, 10, 0, 0, 0, 0}}, + {"10/mAY/2010", "%d/%b/%Y", MyDateTime{2010, 5, 10, 0, 0, 0, 0}}, + {"10/JuN/2010", "%d/%b/%Y", MyDateTime{2010, 6, 10, 0, 0, 0, 0}}, + {"10/JUL/2010", "%d/%b/%Y", MyDateTime{2010, 7, 10, 0, 0, 0, 0}}, + {"10/Aug/2010", "%d/%b/%Y", MyDateTime{2010, 8, 10, 0, 0, 0, 0}}, + {"10/seP/2010", "%d/%b/%Y", MyDateTime{2010, 9, 10, 0, 0, 0, 0}}, + {"10/Oct/2010", "%d/%b/%Y", MyDateTime{2010, 10, 10, 0, 0, 0, 0}}, + {"10/NOV/2010", "%d/%b/%Y", MyDateTime{2010, 11, 10, 0, 0, 0, 0}}, + {"10/DEC/2010", "%d/%b/%Y", MyDateTime{2010, 12, 10, 0, 0, 0, 0}}, + {"10/January/2010", "%d/%b/%Y", std::nullopt}, // Test full spilling + + // Test cases for %M + {"10/January/2010", "%d/%M/%Y", MyDateTime{2010, 1, 10, 0, 0, 0, 0}}, // Test full spilling + {"10/February/2010", "%d/%M/%Y", MyDateTime{2010, 2, 10, 0, 0, 0, 0}}, + {"10/March/2010", "%d/%M/%Y", MyDateTime{2010, 3, 10, 0, 0, 0, 0}}, + {"10/April/2010", "%d/%M/%Y", MyDateTime{2010, 4, 10, 0, 0, 0, 0}}, + {"10/May/2010", "%d/%M/%Y", MyDateTime{2010, 5, 10, 0, 0, 0, 0}}, + {"10/June/2010", "%d/%M/%Y", MyDateTime{2010, 6, 10, 0, 0, 0, 0}}, + {"10/July/2010", "%d/%M/%Y", MyDateTime{2010, 7, 10, 0, 0, 0, 0}}, + {"10/August/2010", "%d/%M/%Y", MyDateTime{2010, 8, 10, 0, 0, 0, 0}}, + {"10/September/2010", "%d/%M/%Y", MyDateTime{2010, 9, 10, 0, 0, 0, 0}}, + {"10/October/2010", "%d/%M/%Y", MyDateTime{2010, 10, 10, 0, 0, 0, 0}}, + {"10/November/2010", "%d/%M/%Y", MyDateTime{2010, 11, 10, 0, 0, 0, 0}}, + {"10/December/2010", "%d/%M/%Y", MyDateTime{2010, 12, 10, 0, 0, 0, 0}}, + + // Test cases for %c + // {"10/0/2010", "%d/%c/%Y", MyDateTime{2010, 0, 10, 0, 0, 0, 0}}, // TODO: Need Check NO_ZERO_DATE + {"10/1/2010", "%d/%c/%Y", MyDateTime{2010, 1, 10, 0, 0, 0, 0}}, + {"10/01/2010", "%d/%c/%Y", MyDateTime{2010, 1, 10, 0, 0, 0, 0}}, + {"10/001/2010", "%d/%c/%Y", std::nullopt}, + {"10/13/2010", "%d/%c/%Y", std::nullopt}, + {"10/12/2010", "%d/%c/%Y", MyDateTime{2010, 12, 10, 0, 0, 0, 0}}, + + // Test cases for %d, %e + // {"0/12/2010", "%d/%c/%Y", MyDateTime{2010, 12, 0, 0, 0, 0, 0}}, // TODO: Need Check NO_ZERO_DATE + {"1/12/2010", "%d/%c/%Y", MyDateTime{2010, 12, 1, 0, 0, 0, 0}}, + {"05/12/2010", "%d/%c/%Y", MyDateTime{2010, 12, 5, 0, 0, 0, 0}}, + {"05/12/2010", "%e/%c/%Y", MyDateTime{2010, 12, 5, 0, 0, 0, 0}}, + {"31/12/2010", "%d/%c/%Y", MyDateTime{2010, 12, 31, 0, 0, 0, 0}}, + {"32/12/2010", "%d/%c/%Y", std::nullopt}, + {"30/11/2010", "%d/%c/%Y", MyDateTime{2010, 11, 30, 0, 0, 0, 0}}, + {"31/11/2010", "%e/%c/%Y", MyDateTime{2010, 11, 31, 0, 0, 0, 0}}, + {"28/2/2010", "%e/%c/%Y", MyDateTime{2010, 2, 28, 0, 0, 0, 0}}, + {"29/2/2010", "%e/%c/%Y", MyDateTime{2010, 2, 29, 0, 0, 0, 0}}, + {"29/2/2020", "%e/%c/%Y", MyDateTime{2020, 2, 29, 0, 0, 0, 0}}, + + // Test cases for %Y + // {"1/12/0000", "%d/%c/%Y", MyDateTime{0000, 12, 1, 0, 0, 0, 0}}, // TODO: Need Check NO_ZERO_DATE + {"1/12/01", "%d/%c/%Y", MyDateTime{2001, 12, 1, 0, 0, 0, 0}}, + {"1/12/0001", "%d/%c/%Y", MyDateTime{0001, 12, 1, 0, 0, 0, 0}}, + {"1/12/2020", "%d/%c/%Y", MyDateTime{2020, 12, 1, 0, 0, 0, 0}}, + {"1/12/9999", "%d/%c/%Y", MyDateTime{9999, 12, 1, 0, 0, 0, 0}}, + + // Test cases for %f + {"01,5,2013 999999", "%d,%c,%Y %f", MyDateTime{2013, 5, 1, 0, 0, 0, 999999}}, + {"01,5,2013 0", "%d,%c,%Y %f", MyDateTime{2013, 5, 1, 0, 0, 0, 0}}, + {"01,5,2013 9999990000000", "%d,%c,%Y %f", MyDateTime{2013, 5, 1, 0, 0, 0, 999999}}, + {"01,5,2013 1", "%d,%c,%Y %f", MyDateTime{2013, 5, 1, 0, 0, 0, 100000}}, + {"01,5,2013 1230", "%d,%c,%Y %f", MyDateTime{2013, 5, 1, 0, 0, 0, 123000}}, + {"01,5,2013 01", "%d,%c,%Y %f", MyDateTime{2013, 5, 1, 0, 0, 0, 10000}}, // issue 3556 + + // Test cases for %h, %I, %l + {"00:11:12 ", "%h:%i:%S ", std::nullopt}, // 0 is not a valid number of %h + {"01:11:12 ", "%I:%i:%S ", MyDateTime{0, 0, 0, 01, 11, 12, 0}}, + {"12:11:12 ", "%l:%i:%S ", MyDateTime{0, 0, 0, 0, 11, 12, 0}}, + + // Test cases for %k, %H + {"00:11:12 ", "%H:%i:%S ", MyDateTime{0, 0, 0, 00, 11, 12, 0}}, + {"01:11:12 ", "%k:%i:%S ", MyDateTime{0, 0, 0, 01, 11, 12, 0}}, + {"12:11:12 ", "%H:%i:%S ", MyDateTime{0, 0, 0, 12, 11, 12, 0}}, + {"24:11:12 ", "%k:%i:%S ", std::nullopt}, + + // Test cases for %i + {"00:00:12 ", "%H:%i:%S ", MyDateTime{0, 0, 0, 00, 00, 12, 0}}, + {"00:01:12 ", "%H:%i:%S ", MyDateTime{0, 0, 0, 00, 01, 12, 0}}, + {"00:59:12 ", "%H:%i:%S ", MyDateTime{0, 0, 0, 00, 59, 12, 0}}, + {"00:60:12 ", "%H:%i:%S ", std::nullopt}, + + // Test cases for %s, %S + {"00:00:00 ", "%H:%i:%s ", MyDateTime{0, 0, 0, 00, 00, 00, 0}}, + {"00:01:01 ", "%H:%i:%s ", MyDateTime{0, 0, 0, 00, 01, 01, 0}}, + {"00:59:59 ", "%H:%i:%S ", MyDateTime{0, 0, 0, 00, 59, 59, 0}}, + {"00:59:60 ", "%H:%i:%S ", std::nullopt}, }; + auto result_formatter = MyDateTimeFormatter("%Y/%m/%d %T.%f"); size_t idx = 0; for (const auto & [input, fmt, expected] : cases) diff --git a/dbms/src/Functions/FunctionsConversion.h b/dbms/src/Functions/FunctionsConversion.h index 3e7abeb2e55..3c1840b86d2 100644 --- a/dbms/src/Functions/FunctionsConversion.h +++ b/dbms/src/Functions/FunctionsConversion.h @@ -1652,7 +1652,7 @@ class FunctionStrToDate : public IFunction const ColumnString * input_raw_col = nullptr; if (input_column->isColumnNullable()) { - auto null_input_column = checkAndGetColumn(input_column.get()); + const auto * null_input_column = checkAndGetColumn(input_column.get()); input_raw_col = checkAndGetColumn(null_input_column->getNestedColumnPtr().get()); } else @@ -1702,7 +1702,7 @@ class FunctionStrToDate : public IFunction const ColumnString * format_raw_col = nullptr; if (format_column->isColumnNullable()) { - auto null_format_column = checkAndGetColumn(format_column.get()); + const auto * null_format_column = checkAndGetColumn(format_column.get()); format_raw_col = checkAndGetColumn(null_format_column->getNestedColumnPtr().get()); } else @@ -1710,6 +1710,14 @@ class FunctionStrToDate : public IFunction format_raw_col = checkAndGetColumn(format_column.get()); } + String str_input_const; + StringRef str_ref; + if (input_column->isColumnConst()) + { + const auto & input_const = checkAndGetColumnConst(input_column.get()); + str_input_const = input_const->getValue(); + str_ref = StringRef(str_input_const); + } for (size_t i = 0; i < num_rows; ++i) { // Set null for either null input or null format @@ -1734,7 +1742,10 @@ class FunctionStrToDate : public IFunction const auto format_ref = format_raw_col->getDataAt(i); auto parser = MyDateTimeParser(format_ref.toString()); - const auto str_ref = input_raw_col->getDataAt(i); + if (!input_column->isColumnConst()) + { + str_ref = input_raw_col->getDataAt(i); + } if (auto parse_res = parser.parseAsPackedUInt(str_ref); parse_res) { datetime_res[i] = *parse_res; From 48a2afc780a7430b4ccca306f4abddca0d361583 Mon Sep 17 00:00:00 2001 From: Ti Chi Robot Date: Thu, 27 Jan 2022 11:33:12 +0800 Subject: [PATCH 20/21] Fix cast to decimal overflow bug (#3922) (#3942) --- dbms/src/Functions/FunctionsTiDBConversion.h | 24 ++++++++++++------- dbms/src/Storages/Page/PageUtil.cpp | 2 +- .../fullstack-test/expr/cast_as_decimal.test | 17 +++++++++++++ 3 files changed, 33 insertions(+), 10 deletions(-) create mode 100644 tests/fullstack-test/expr/cast_as_decimal.test diff --git a/dbms/src/Functions/FunctionsTiDBConversion.h b/dbms/src/Functions/FunctionsTiDBConversion.h index 027c16f361a..0a7b847ccba 100644 --- a/dbms/src/Functions/FunctionsTiDBConversion.h +++ b/dbms/src/Functions/FunctionsTiDBConversion.h @@ -792,21 +792,27 @@ struct TiDBConvertToDecimal using FromFieldType = typename FromDataType::FieldType; template - static U toTiDBDecimalInternal(T value, PrecType prec, ScaleType scale, const Context & context) + static U toTiDBDecimalInternal(T int_value, PrecType prec, ScaleType scale, const Context & context) { + // int_value is the value that exposes to user. Such as cast(val to decimal), val is the int_value which used by user. + // And val * scale_mul is the scaled_value, which is stored in ColumnDecimal internally. + static_assert(std::is_integral_v); using UType = typename U::NativeType; - auto maxValue = DecimalMaxValue::Get(prec); - if (value > maxValue || value < -maxValue) + UType scale_mul = getScaleMultiplier(scale); + + Int256 scaled_value = static_cast(int_value) * static_cast(scale_mul); + Int256 scaled_max_value = DecimalMaxValue::Get(prec); + + if (scaled_value > scaled_max_value || scaled_value < -scaled_max_value) { context.getDAGContext()->handleOverflowError("cast to decimal", Errors::Types::Truncated); - if (value > 0) - return static_cast(maxValue); + if (int_value > 0) + return static_cast(scaled_max_value); else - return static_cast(-maxValue); + return static_cast(-scaled_max_value); } - UType scale_mul = getScaleMultiplier(scale); - U result = static_cast(value) * scale_mul; - return result; + + return static_cast(scaled_value); } template diff --git a/dbms/src/Storages/Page/PageUtil.cpp b/dbms/src/Storages/Page/PageUtil.cpp index 1a7ee25714f..ab89ce7296c 100644 --- a/dbms/src/Storages/Page/PageUtil.cpp +++ b/dbms/src/Storages/Page/PageUtil.cpp @@ -55,7 +55,7 @@ void syncFile(WritableFilePtr & file) #ifndef NDEBUG void writeFile( - WritableFilePtr & file, UInt64 offset, char * data, size_t to_write, const RateLimiterPtr & rate_limiter, bool enable_failpoint) + WritableFilePtr & file, UInt64 offset, char * data, size_t to_write, const RateLimiterPtr & rate_limiter, bool enable_failpoint [[ maybe_unused ]]) #else void writeFile(WritableFilePtr & file, UInt64 offset, char * data, size_t to_write, const RateLimiterPtr & rate_limiter) #endif diff --git a/tests/fullstack-test/expr/cast_as_decimal.test b/tests/fullstack-test/expr/cast_as_decimal.test new file mode 100644 index 00000000000..b244323015c --- /dev/null +++ b/tests/fullstack-test/expr/cast_as_decimal.test @@ -0,0 +1,17 @@ +mysql> drop table if exists test.t1; +mysql> create table test.t1(c1 int); +mysql> insert into test.t1 values(9999), (-9999), (99), (-99); +mysql> alter table test.t1 set tiflash replica 1; +func> wait_table test t1 +mysql> set @@tidb_isolation_read_engines='tiflash'; select cast(c1 as decimal(4, 1)) from test.t1 order by 1; +cast(c1 as decimal(4, 1)) +-999.9 +-99.0 +99.0 +999.9 +mysql> set @@tidb_isolation_read_engines='tiflash'; select cast(c1 as decimal(2, 2)) from test.t1 order by 1; +cast(c1 as decimal(2, 2)) +-0.99 +-0.99 + 0.99 + 0.99 From 18e3df0adc72bad288661bfb33371c6450ea7766 Mon Sep 17 00:00:00 2001 From: ti-srebot <66930949+ti-srebot@users.noreply.github.com> Date: Fri, 28 Jan 2022 10:31:12 +0800 Subject: [PATCH 21/21] expand streams after agg (#2530) (#2537) --- .../Flash/Coprocessor/DAGQueryBlockInterpreter.cpp | 13 ++++++++++++- .../Flash/Coprocessor/DAGQueryBlockInterpreter.h | 3 +++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp index 2891d3f772b..f35850c1412 100644 --- a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp +++ b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp @@ -895,12 +895,12 @@ void DAGQueryBlockInterpreter::executeAggregation(DAGPipeline & pipeline, const /// If there are several sources, then we perform parallel aggregation if (pipeline.streams.size() > 1) { + before_agg_streams = pipeline.streams.size(); BlockInputStreamPtr stream_with_non_joined_data = combinedNonJoinedDataStream(pipeline, max_streams); pipeline.firstStream() = std::make_shared(pipeline.streams, stream_with_non_joined_data, params, context.getFileProvider(), true, max_streams, settings.aggregation_memory_efficient_merge_threads ? static_cast(settings.aggregation_memory_efficient_merge_threads) : static_cast(settings.max_threads)); - pipeline.streams.resize(1); } else @@ -1471,6 +1471,17 @@ BlockInputStreams DAGQueryBlockInterpreter::execute() } } + /// expand concurrency after agg + if(!query_block.isRootQueryBlock() && before_agg_streams > 1 && pipeline.streams.size()==1) + { + size_t concurrency = before_agg_streams; + BlockInputStreamPtr shared_query_block_input_stream + = std::make_shared(concurrency * 5, pipeline.firstStream()); + pipeline.streams.clear(); + for (size_t i = 0; i < concurrency; i++) + pipeline.streams.push_back(std::make_shared(shared_query_block_input_stream)); + } + return pipeline.streams; } } // namespace DB diff --git a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.h b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.h index 449a5471c2d..d172af5f3fd 100644 --- a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.h +++ b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.h @@ -148,6 +148,9 @@ class DAGQueryBlockInterpreter /// How many streams we ask for storage to produce, and in how many threads we will do further processing. size_t max_streams = 1; + /// How many streams before aggregation + size_t before_agg_streams = 1; + /// Table from where to read data, if not subquery. ManageableStoragePtr storage; TableLockHolder table_drop_lock;