Skip to content

Commit

Permalink
Add test for partition types
Browse files Browse the repository at this point in the history
  • Loading branch information
rui-mo committed Feb 9, 2024
1 parent d1d4f1e commit 61b1431
Show file tree
Hide file tree
Showing 2 changed files with 88 additions and 20 deletions.
4 changes: 3 additions & 1 deletion velox/connectors/hive/HiveConnectorSplit.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,9 @@ struct HiveConnectorSplit : public connector::ConnectorSplit {
/// Mapping from partition keys to values. Values are specified as strings
/// formatted the same way as CAST(x as VARCHAR). Null values are specified as
/// std::nullopt. Date values must be formatted using ISO 8601 as YYYY-MM-DD.
/// All scalar types and date type are supported.
/// Decimal values must be formatted using unscaled values, e.g. '123456' for
/// '1245.56' of decimal(6, 2) type. All scalar types and date type are
/// supported.
const std::unordered_map<std::string, std::optional<std::string>>
partitionKeys;
std::optional<int32_t> tableBucketNumber;
Expand Down
104 changes: 85 additions & 19 deletions velox/exec/tests/TableScanTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,17 @@ using namespace facebook::velox::common::test;
using namespace facebook::velox::exec::test;

namespace {
std::string makeCastSql(const variant& v, const TypePtr& type) {
std::ostringstream out;
const std::string value = type->isDate()
? folly::parseJson(v.toJson(type)).asString()
: v.toJson(type);
out << "CAST('" << value << "' AS ";
toTypeSql(type, out);
out << ")";
return out.str();
}

void verifyCacheStats(
const FileHandleCacheStats& cacheStats,
size_t curSize,
Expand Down Expand Up @@ -148,10 +159,25 @@ class TableScanTest : public virtual HiveConnectorTestBase {
void testPartitionedTableImpl(
const std::string& filePath,
const TypePtr& partitionType,
const std::optional<std::string>& partitionValue) {
auto split = HiveConnectorSplitBuilder(filePath)
.partitionKey("pkey", partitionValue)
.build();
const variant& partitionValue) {
// Create the partition value of a split.
std::optional<std::string> value = std::nullopt;
if (!partitionValue.isNull()) {
auto type = partitionType;
if (partitionType->isDecimal()) {
const auto [precision, scale] =
getDecimalPrecisionScale(*partitionType);
// The partition value of decimal should be formatted with unscaled
// value.
type = DECIMAL(precision, 0);
}
value = std::optional<std::string>(
partitionType->isDate()
? folly::parseJson(partitionValue.toJson(type)).asString()
: partitionValue.toJson(type));
}
auto split =
HiveConnectorSplitBuilder(filePath).partitionKey("pkey", value).build();
auto outputType =
ROW({"pkey", "c0", "c1"}, {partitionType, BIGINT(), DOUBLE()});
ColumnHandleMap assignments = {
Expand All @@ -166,8 +192,10 @@ class TableScanTest : public virtual HiveConnectorTestBase {
.endTableScan()
.planNode();

std::string partitionValueStr =
partitionValue.has_value() ? "'" + *partitionValue + "'" : "null";
std::string partitionValueStr = partitionValue.isNull()
? "null"
: makeCastSql(partitionValue, partitionType);

assertQuery(
op, split, fmt::format("SELECT {}, * FROM tmp", partitionValueStr));

Expand Down Expand Up @@ -210,9 +238,10 @@ class TableScanTest : public virtual HiveConnectorTestBase {
void testPartitionedTable(
const std::string& filePath,
const TypePtr& partitionType,
const std::optional<std::string>& partitionValue) {
const variant& partitionValue) {
testPartitionedTableImpl(filePath, partitionType, partitionValue);
testPartitionedTableImpl(filePath, partitionType, std::nullopt);
testPartitionedTableImpl(
filePath, partitionType, variant::null(partitionType->kind()));
}

RowTypePtr rowType_{
Expand Down Expand Up @@ -1442,7 +1471,7 @@ TEST_F(TableScanTest, partitionedTableVarcharKey) {
writeToFile(filePath->path, vectors);
createDuckDbTable(vectors);

testPartitionedTable(filePath->path, VARCHAR(), "2020-11-01");
testPartitionedTable(filePath->path, VARCHAR(), variant("2020-11-01"));
}

TEST_F(TableScanTest, partitionedTableBigIntKey) {
Expand All @@ -1451,7 +1480,10 @@ TEST_F(TableScanTest, partitionedTableBigIntKey) {
auto filePath = TempFilePath::create();
writeToFile(filePath->path, vectors);
createDuckDbTable(vectors);
testPartitionedTable(filePath->path, BIGINT(), "123456789123456789");
testPartitionedTable(
filePath->path,
BIGINT(),
variant::create<TypeKind::BIGINT>(123456789123456789));
}

TEST_F(TableScanTest, partitionedTableIntegerKey) {
Expand All @@ -1460,7 +1492,8 @@ TEST_F(TableScanTest, partitionedTableIntegerKey) {
auto filePath = TempFilePath::create();
writeToFile(filePath->path, vectors);
createDuckDbTable(vectors);
testPartitionedTable(filePath->path, INTEGER(), "123456789");
testPartitionedTable(
filePath->path, INTEGER(), variant::create<TypeKind::INTEGER>(123456789));
}

TEST_F(TableScanTest, partitionedTableSmallIntKey) {
Expand All @@ -1469,7 +1502,8 @@ TEST_F(TableScanTest, partitionedTableSmallIntKey) {
auto filePath = TempFilePath::create();
writeToFile(filePath->path, vectors);
createDuckDbTable(vectors);
testPartitionedTable(filePath->path, SMALLINT(), "1");
testPartitionedTable(
filePath->path, SMALLINT(), variant::create<TypeKind::SMALLINT>(1));
}

TEST_F(TableScanTest, partitionedTableTinyIntKey) {
Expand All @@ -1478,7 +1512,8 @@ TEST_F(TableScanTest, partitionedTableTinyIntKey) {
auto filePath = TempFilePath::create();
writeToFile(filePath->path, vectors);
createDuckDbTable(vectors);
testPartitionedTable(filePath->path, TINYINT(), "1");
testPartitionedTable(
filePath->path, TINYINT(), variant::create<TypeKind::TINYINT>(1));
}

TEST_F(TableScanTest, partitionedTableBooleanKey) {
Expand All @@ -1487,7 +1522,8 @@ TEST_F(TableScanTest, partitionedTableBooleanKey) {
auto filePath = TempFilePath::create();
writeToFile(filePath->path, vectors);
createDuckDbTable(vectors);
testPartitionedTable(filePath->path, BOOLEAN(), "0");
testPartitionedTable(
filePath->path, BOOLEAN(), variant::create<TypeKind::BOOLEAN>(false));
}

TEST_F(TableScanTest, partitionedTableRealKey) {
Expand All @@ -1496,7 +1532,8 @@ TEST_F(TableScanTest, partitionedTableRealKey) {
auto filePath = TempFilePath::create();
writeToFile(filePath->path, vectors);
createDuckDbTable(vectors);
testPartitionedTable(filePath->path, REAL(), "3.5");
testPartitionedTable(
filePath->path, REAL(), variant::create<TypeKind::REAL>(3.5));
}

TEST_F(TableScanTest, partitionedTableDoubleKey) {
Expand All @@ -1505,7 +1542,35 @@ TEST_F(TableScanTest, partitionedTableDoubleKey) {
auto filePath = TempFilePath::create();
writeToFile(filePath->path, vectors);
createDuckDbTable(vectors);
testPartitionedTable(filePath->path, DOUBLE(), "3.5");
testPartitionedTable(
filePath->path, DOUBLE(), variant::create<TypeKind::DOUBLE>(3.5));
}

TEST_F(TableScanTest, partitionedTableDecimalKey) {
auto rowType = ROW({"c0", "c1"}, {BIGINT(), DOUBLE()});
auto vectors = makeVectors(10, 1'000, rowType);
auto filePath = TempFilePath::create();
writeToFile(filePath->path, vectors);
createDuckDbTable(vectors);

testPartitionedTable(
filePath->path,
DECIMAL(12, 3),
variant::create<TypeKind::BIGINT>(123456789123));
testPartitionedTable(
filePath->path,
DECIMAL(12, 3),
variant::create<TypeKind::BIGINT>(-123456789123));
testPartitionedTable(
filePath->path,
DECIMAL(36, 18),
variant::create<TypeKind::HUGEINT>(
HugeInt::parse("123456789123456789123456789123456789")));
testPartitionedTable(
filePath->path,
DECIMAL(36, 18),
variant::create<TypeKind::HUGEINT>(
HugeInt::parse("-123456789123456789123456789123456789")));
}

TEST_F(TableScanTest, partitionedTableDateKey) {
Expand All @@ -1514,7 +1579,8 @@ TEST_F(TableScanTest, partitionedTableDateKey) {
auto filePath = TempFilePath::create();
writeToFile(filePath->path, vectors);
createDuckDbTable(vectors);
testPartitionedTable(filePath->path, DATE(), "2023-10-27");
testPartitionedTable(
filePath->path, DATE(), variant::create<TypeKind::INTEGER>(19657));
}

std::vector<StringView> toStringViews(const std::vector<std::string>& values) {
Expand Down Expand Up @@ -3709,7 +3775,7 @@ TEST_F(TableScanTest, readMissingFieldsWithMoreColumns) {
}
}

TEST_F(TableScanTest, varbinaryPartitionKey) {
TEST_F(TableScanTest, partitionedTableVarbinaryKey) {
auto vectors = makeVectors(1, 1'000);
auto filePath = TempFilePath::create();
writeToFile(filePath->path, vectors);
Expand All @@ -3734,7 +3800,7 @@ TEST_F(TableScanTest, varbinaryPartitionKey) {
assertQuery(op, split, "SELECT c0, '2021-12-02' FROM tmp");
}

TEST_F(TableScanTest, timestampPartitionKey) {
TEST_F(TableScanTest, partitionedTableTimestampKey) {
const char* inputs[] = {"2023-10-14 07:00:00.0", "2024-01-06 04:00:00.0"};
auto expected = makeRowVector(
{"t"},
Expand Down

0 comments on commit 61b1431

Please sign in to comment.