diff --git a/dbms/src/Core/ColumnWithTypeAndName.h b/dbms/src/Core/ColumnWithTypeAndName.h index ea889cdb2dd..42a98f795fd 100644 --- a/dbms/src/Core/ColumnWithTypeAndName.h +++ b/dbms/src/Core/ColumnWithTypeAndName.h @@ -48,6 +48,9 @@ struct ColumnWithTypeAndName : ColumnWithTypeAndName(type_->createColumn(), type_, name_) {} + ColumnWithTypeAndName(ColumnPtr column_, DataTypePtr type_) + : ColumnWithTypeAndName(column_, type_, "") + {} // If we use `Field default_value_` as the param and init default_value with `std::move(default_value_)`, it brings compile warnings. // Check PR#1383 for more details ColumnWithTypeAndName(ColumnPtr column_, DataTypePtr type_, String name_, Int64 column_id_ = 0, const Field & default_value_ = Field()) diff --git a/dbms/src/TestUtils/ColumnGenerator.cpp b/dbms/src/TestUtils/ColumnGenerator.cpp new file mode 100644 index 00000000000..30d2570de90 --- /dev/null +++ b/dbms/src/TestUtils/ColumnGenerator.cpp @@ -0,0 +1,193 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include + +namespace DB::tests +{ +ColumnWithTypeAndName ColumnGenerator::generate(const ColumnGeneratorOpts & opts) +{ + int_rand_gen = std::uniform_int_distribution(0, opts.string_max_size); + DataTypePtr type; + if (opts.type_name == "Decimal") + type = createDecimalType(); + else + type = DataTypeFactory::instance().get(opts.type_name); + + auto col = type->createColumn(); + col->reserve(opts.size); + + auto type_id = type->getTypeId(); + + switch (type_id) + { + case TypeIndex::UInt8: + case TypeIndex::UInt16: + case TypeIndex::UInt32: + case TypeIndex::UInt64: + for (size_t i = 0; i < opts.size; ++i) + genUInt(col); + break; + case TypeIndex::Int8: + case TypeIndex::Int16: + case TypeIndex::Int32: + case TypeIndex::Int64: + for (size_t i = 0; i < opts.size; ++i) + genInt(col); + break; + case TypeIndex::Float32: + case TypeIndex::Float64: + for (size_t i = 0; i < opts.size; ++i) + genFloat(col); + break; + case TypeIndex::String: + for (size_t i = 0; i < opts.size; ++i) + genString(col); + break; + case TypeIndex::Decimal32: + case TypeIndex::Decimal64: + case TypeIndex::Decimal128: + case TypeIndex::Decimal256: + for (size_t i = 0; i < opts.size; ++i) + genDecimal(col, type); + break; + case TypeIndex::MyDate: + for (size_t i = 0; i < opts.size; ++i) + genDate(col); + break; + case TypeIndex::MyDateTime: + for (size_t i = 0; i < opts.size; ++i) + genDateTime(col); + break; + default: + throw std::invalid_argument("RandomColumnGenerator invalid type"); + } + + return {std::move(col), type}; +} + +DataTypePtr ColumnGenerator::createDecimalType() +{ + static const int max_precision = std::to_string(std::numeric_limits::max()).size(); + int prec = rand_gen() % max_precision + 1; + int scale = rand_gen() % prec; + return DB::createDecimal(prec, scale); +} + +String ColumnGenerator::randomString() +{ + String str(int_rand_gen(rand_gen), 0); + std::generate_n(str.begin(), str.size(), [this]() { return charset[rand_gen() % charset.size()]; }); + return str; +} + +int ColumnGenerator::randomTimeOffset() +{ + static constexpr int max_offset = 24 * 3600 * 10000; // 10000 days for test + return (rand_gen() % max_offset) * (rand_gen() % 2 == 0 ? 1 : -1); +} + +time_t ColumnGenerator::randomUTCTimestamp() +{ + return ::time(nullptr) + randomTimeOffset(); +} + +struct tm ColumnGenerator::randomLocalTime() +{ + time_t t = randomUTCTimestamp(); + struct tm res + { + }; + + if (localtime_r(&t, &res) == nullptr) + { + throw std::invalid_argument(fmt::format("localtime_r({}) ret {}", t, strerror(errno))); + } + return res; +} + +String ColumnGenerator::randomDate() +{ + auto res = randomLocalTime(); + return fmt::format("{}-{}-{}", res.tm_year + 1900, res.tm_mon + 1, res.tm_mday); +} + +String ColumnGenerator::randomDateTime() +{ + auto res = randomLocalTime(); + return fmt::format("{}-{}-{} {}:{}:{}", res.tm_year + 1900, res.tm_mon + 1, res.tm_mday, res.tm_hour, res.tm_min, res.tm_sec); +} + +String ColumnGenerator::randomDecimal(uint64_t prec, uint64_t scale) +{ + auto s = std::to_string(rand_gen()); + if (s.size() < prec) + s += String(prec - s.size(), '0'); + else if (s.size() > prec) + s = s.substr(0, prec); + return s.substr(0, prec - scale) + "." + s.substr(prec - scale); +} + +void ColumnGenerator::genInt(MutableColumnPtr & col) +{ + Field f = static_cast(rand_gen()); + col->insert(f); +} + +void ColumnGenerator::genUInt(MutableColumnPtr & col) +{ + Field f = static_cast(rand_gen()); + col->insert(f); +} + +void ColumnGenerator::genFloat(MutableColumnPtr & col) +{ + Field f = static_cast(real_rand_gen(rand_gen)); + col->insert(f); +} + +void ColumnGenerator::genString(MutableColumnPtr & col) +{ + Field f = randomString(); + col->insert(f); +} + +void ColumnGenerator::genDate(MutableColumnPtr & col) +{ + Field f = parseMyDateTime(randomDate()); + col->insert(f); +} + +void ColumnGenerator::genDateTime(MutableColumnPtr & col) +{ + Field f = parseMyDateTime(randomDateTime()); + col->insert(f); +} + +void ColumnGenerator::genDecimal(MutableColumnPtr & col, DataTypePtr & data_type) +{ + auto prec = getDecimalPrecision(*data_type, 0); + auto scale = getDecimalScale(*data_type, 0); + auto s = randomDecimal(prec, scale); + bool negative = rand_gen() % 2 == 0; + Field f; + if (parseDecimal(s.data(), s.size(), negative, f)) + { + col->insert(f); + } + else + { + throw std::invalid_argument(fmt::format("RandomColumnGenerator parseDecimal({}, {}) prec {} scale {} fail", s, negative, prec, scale)); + } +} +} // namespace DB::tests \ No newline at end of file diff --git a/dbms/src/TestUtils/ColumnGenerator.h b/dbms/src/TestUtils/ColumnGenerator.h new file mode 100644 index 00000000000..1a8f6d624e2 --- /dev/null +++ b/dbms/src/TestUtils/ColumnGenerator.h @@ -0,0 +1,68 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include +#include +#include + +#include +#include + +namespace DB::tests +{ +enum DataDistribution +{ + RANDOM, + // TODO support zipf and more distribution. +}; + +struct ColumnGeneratorOpts +{ + size_t size; + String type_name; + DataDistribution distribution; + size_t string_max_size = 128; +}; + +class ColumnGenerator : public ext::Singleton +{ +public: + ColumnWithTypeAndName generate(const ColumnGeneratorOpts & opts); + +private: + std::mt19937_64 rand_gen; + std::uniform_int_distribution int_rand_gen = std::uniform_int_distribution(0, 128); + std::uniform_real_distribution real_rand_gen; + const std::string charset{"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz!@#$%^&*()、|【】[]{}「」;::;'‘,<《.>》。?·~`~"}; + + String randomString(); + int randomTimeOffset(); + time_t randomUTCTimestamp(); + struct tm randomLocalTime(); + String randomDate(); + String randomDateTime(); + String randomDecimal(uint64_t prec, uint64_t scale); + + DataTypePtr createDecimalType(); + + void genInt(MutableColumnPtr & col); + void genUInt(MutableColumnPtr & col); + void genFloat(MutableColumnPtr & col); + void genString(MutableColumnPtr & col); + void genDate(MutableColumnPtr & col); + void genDateTime(MutableColumnPtr & col); + void genDecimal(MutableColumnPtr & col, DataTypePtr & data_type); +}; +} // namespace DB::tests \ No newline at end of file diff --git a/dbms/src/TestUtils/tests/gtest_column_generator.cpp b/dbms/src/TestUtils/tests/gtest_column_generator.cpp new file mode 100644 index 00000000000..f6400aa226c --- /dev/null +++ b/dbms/src/TestUtils/tests/gtest_column_generator.cpp @@ -0,0 +1,37 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +namespace DB +{ +namespace tests +{ + +TEST(TestColumnGenerator, run) +try +{ + std::vector type_vec = {"Int8", "Int16", "Int32", "Int64", "UInt8", "UInt16", "UInt32", "UInt64", "Float32", "Float64", "String", "MyDateTime", "MyDate", "Decimal"}; + for (size_t i = 10; i <= 100000; i *= 10) + { + for (auto type : type_vec) + ASSERT_EQ(ColumnGenerator::instance().generate({i, type, RANDOM}).column->size(), i); + } +} +CATCH + +} // namespace tests + +} // namespace DB