Skip to content

Commit

Permalink
Test: RandomGenColumns (#5743)
Browse files Browse the repository at this point in the history
ref #4609
  • Loading branch information
ywqzzy authored Sep 2, 2022
1 parent b4ddec8 commit ade4325
Show file tree
Hide file tree
Showing 4 changed files with 301 additions and 0 deletions.
3 changes: 3 additions & 0 deletions dbms/src/Core/ColumnWithTypeAndName.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,9 @@ struct ColumnWithTypeAndName
: ColumnWithTypeAndName(type_->createColumn(), type_, name_)
{}

ColumnWithTypeAndName(ColumnPtr column_, DataTypePtr type_)
: ColumnWithTypeAndName(column_, type_, "")
{}
// If we use `Field default_value_` as the param and init default_value with `std::move(default_value_)`, it brings compile warnings.
// Check PR#1383 for more details
ColumnWithTypeAndName(ColumnPtr column_, DataTypePtr type_, String name_, Int64 column_id_ = 0, const Field & default_value_ = Field())
Expand Down
193 changes: 193 additions & 0 deletions dbms/src/TestUtils/ColumnGenerator.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,193 @@
// Copyright 2022 PingCAP, Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <TestUtils/ColumnGenerator.h>

namespace DB::tests
{
ColumnWithTypeAndName ColumnGenerator::generate(const ColumnGeneratorOpts & opts)
{
int_rand_gen = std::uniform_int_distribution<Int64>(0, opts.string_max_size);
DataTypePtr type;
if (opts.type_name == "Decimal")
type = createDecimalType();
else
type = DataTypeFactory::instance().get(opts.type_name);

auto col = type->createColumn();
col->reserve(opts.size);

auto type_id = type->getTypeId();

switch (type_id)
{
case TypeIndex::UInt8:
case TypeIndex::UInt16:
case TypeIndex::UInt32:
case TypeIndex::UInt64:
for (size_t i = 0; i < opts.size; ++i)
genUInt(col);
break;
case TypeIndex::Int8:
case TypeIndex::Int16:
case TypeIndex::Int32:
case TypeIndex::Int64:
for (size_t i = 0; i < opts.size; ++i)
genInt(col);
break;
case TypeIndex::Float32:
case TypeIndex::Float64:
for (size_t i = 0; i < opts.size; ++i)
genFloat(col);
break;
case TypeIndex::String:
for (size_t i = 0; i < opts.size; ++i)
genString(col);
break;
case TypeIndex::Decimal32:
case TypeIndex::Decimal64:
case TypeIndex::Decimal128:
case TypeIndex::Decimal256:
for (size_t i = 0; i < opts.size; ++i)
genDecimal(col, type);
break;
case TypeIndex::MyDate:
for (size_t i = 0; i < opts.size; ++i)
genDate(col);
break;
case TypeIndex::MyDateTime:
for (size_t i = 0; i < opts.size; ++i)
genDateTime(col);
break;
default:
throw std::invalid_argument("RandomColumnGenerator invalid type");
}

return {std::move(col), type};
}

DataTypePtr ColumnGenerator::createDecimalType()
{
static const int max_precision = std::to_string(std::numeric_limits<uint64_t>::max()).size();
int prec = rand_gen() % max_precision + 1;
int scale = rand_gen() % prec;
return DB::createDecimal(prec, scale);
}

String ColumnGenerator::randomString()
{
String str(int_rand_gen(rand_gen), 0);
std::generate_n(str.begin(), str.size(), [this]() { return charset[rand_gen() % charset.size()]; });
return str;
}

int ColumnGenerator::randomTimeOffset()
{
static constexpr int max_offset = 24 * 3600 * 10000; // 10000 days for test
return (rand_gen() % max_offset) * (rand_gen() % 2 == 0 ? 1 : -1);
}

time_t ColumnGenerator::randomUTCTimestamp()
{
return ::time(nullptr) + randomTimeOffset();
}

struct tm ColumnGenerator::randomLocalTime()
{
time_t t = randomUTCTimestamp();
struct tm res
{
};

if (localtime_r(&t, &res) == nullptr)
{
throw std::invalid_argument(fmt::format("localtime_r({}) ret {}", t, strerror(errno)));
}
return res;
}

String ColumnGenerator::randomDate()
{
auto res = randomLocalTime();
return fmt::format("{}-{}-{}", res.tm_year + 1900, res.tm_mon + 1, res.tm_mday);
}

String ColumnGenerator::randomDateTime()
{
auto res = randomLocalTime();
return fmt::format("{}-{}-{} {}:{}:{}", res.tm_year + 1900, res.tm_mon + 1, res.tm_mday, res.tm_hour, res.tm_min, res.tm_sec);
}

String ColumnGenerator::randomDecimal(uint64_t prec, uint64_t scale)
{
auto s = std::to_string(rand_gen());
if (s.size() < prec)
s += String(prec - s.size(), '0');
else if (s.size() > prec)
s = s.substr(0, prec);
return s.substr(0, prec - scale) + "." + s.substr(prec - scale);
}

void ColumnGenerator::genInt(MutableColumnPtr & col)
{
Field f = static_cast<Int64>(rand_gen());
col->insert(f);
}

void ColumnGenerator::genUInt(MutableColumnPtr & col)
{
Field f = static_cast<UInt64>(rand_gen());
col->insert(f);
}

void ColumnGenerator::genFloat(MutableColumnPtr & col)
{
Field f = static_cast<Float64>(real_rand_gen(rand_gen));
col->insert(f);
}

void ColumnGenerator::genString(MutableColumnPtr & col)
{
Field f = randomString();
col->insert(f);
}

void ColumnGenerator::genDate(MutableColumnPtr & col)
{
Field f = parseMyDateTime(randomDate());
col->insert(f);
}

void ColumnGenerator::genDateTime(MutableColumnPtr & col)
{
Field f = parseMyDateTime(randomDateTime());
col->insert(f);
}

void ColumnGenerator::genDecimal(MutableColumnPtr & col, DataTypePtr & data_type)
{
auto prec = getDecimalPrecision(*data_type, 0);
auto scale = getDecimalScale(*data_type, 0);
auto s = randomDecimal(prec, scale);
bool negative = rand_gen() % 2 == 0;
Field f;
if (parseDecimal(s.data(), s.size(), negative, f))
{
col->insert(f);
}
else
{
throw std::invalid_argument(fmt::format("RandomColumnGenerator parseDecimal({}, {}) prec {} scale {} fail", s, negative, prec, scale));
}
}
} // namespace DB::tests
68 changes: 68 additions & 0 deletions dbms/src/TestUtils/ColumnGenerator.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
// Copyright 2022 PingCAP, Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once
#include <Core/ColumnsWithTypeAndName.h>
#include <DataTypes/DataTypeDecimal.h>
#include <DataTypes/DataTypeFactory.h>

#include <ext/singleton.h>
#include <random>

namespace DB::tests
{
enum DataDistribution
{
RANDOM,
// TODO support zipf and more distribution.
};

struct ColumnGeneratorOpts
{
size_t size;
String type_name;
DataDistribution distribution;
size_t string_max_size = 128;
};

class ColumnGenerator : public ext::Singleton<ColumnGenerator>
{
public:
ColumnWithTypeAndName generate(const ColumnGeneratorOpts & opts);

private:
std::mt19937_64 rand_gen;
std::uniform_int_distribution<Int64> int_rand_gen = std::uniform_int_distribution<Int64>(0, 128);
std::uniform_real_distribution<double> real_rand_gen;
const std::string charset{"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz!@#$%^&*()、|【】[]{}「」;::;'‘,<《.>》。?·~`~"};

String randomString();
int randomTimeOffset();
time_t randomUTCTimestamp();
struct tm randomLocalTime();
String randomDate();
String randomDateTime();
String randomDecimal(uint64_t prec, uint64_t scale);

DataTypePtr createDecimalType();

void genInt(MutableColumnPtr & col);
void genUInt(MutableColumnPtr & col);
void genFloat(MutableColumnPtr & col);
void genString(MutableColumnPtr & col);
void genDate(MutableColumnPtr & col);
void genDateTime(MutableColumnPtr & col);
void genDecimal(MutableColumnPtr & col, DataTypePtr & data_type);
};
} // namespace DB::tests
37 changes: 37 additions & 0 deletions dbms/src/TestUtils/tests/gtest_column_generator.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
// Copyright 2022 PingCAP, Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <TestUtils/ColumnGenerator.h>
#include <TestUtils/TiFlashTestBasic.h>

namespace DB
{
namespace tests
{

TEST(TestColumnGenerator, run)
try
{
std::vector<String> type_vec = {"Int8", "Int16", "Int32", "Int64", "UInt8", "UInt16", "UInt32", "UInt64", "Float32", "Float64", "String", "MyDateTime", "MyDate", "Decimal"};
for (size_t i = 10; i <= 100000; i *= 10)
{
for (auto type : type_vec)
ASSERT_EQ(ColumnGenerator::instance().generate({i, type, RANDOM}).column->size(), i);
}
}
CATCH

} // namespace tests

} // namespace DB

0 comments on commit ade4325

Please sign in to comment.