Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use simdjson for binary json construction for improved performance #10464

Merged
merged 27 commits into from
Oct 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/config/muted_ya.txt
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ ydb/core/tx/schemeshard/ut_move_reboots TSchemeShardMoveRebootsTest.WithDataAndP
ydb/core/tx/schemeshard/ut_pq_reboots TPqGroupTestReboots.AlterWithReboots-PQConfigTransactionsAtSchemeShard-false
ydb/core/tx/schemeshard/ut_restore TImportTests.ShouldSucceedOnManyTables
ydb/core/tx/schemeshard/ut_split_merge TSchemeShardSplitBySizeTest.Merge1KShards
ydb/core/tx/tiering/ut ColumnShardTiers.TTLUsage
ydb/core/tx/tx_proxy/ut_ext_tenant TExtSubDomainTest.CreateTableInsideAndAlterDomainAndTable-AlterDatabaseCreateHiveFirst*
ydb/core/tx/tx_proxy/ut_storage_tenant TStorageTenantTest.RemoveStoragePoolBeforeDroppingTablet
ydb/core/util/ut TCircularOperationQueueTest.ShouldShuffle
Expand Down
1 change: 1 addition & 0 deletions ydb/core/formats/arrow/accessor/abstract/ya.make
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ PEERDIR(
contrib/libs/apache/arrow
ydb/library/conclusion
ydb/services/metadata/abstract
ydb/library/actors/core
ydb/library/formats/arrow/accessor/abstract
ydb/library/formats/arrow/accessor/common
ydb/library/formats/arrow/protos
Expand Down
9 changes: 5 additions & 4 deletions ydb/core/formats/arrow/converter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,8 @@ static bool ConvertData(TCell& cell, const NScheme::TTypeInfo& colType, TMemoryP
}
case NScheme::NTypeIds::JsonDocument: {
const auto binaryJson = NBinaryJson::SerializeToBinaryJson(cell.AsBuf());
if (!binaryJson.Defined()) {
errorMessage = "Invalid JSON for JsonDocument provided";
if (binaryJson.IsFail()) {
errorMessage = "Invalid JSON for JsonDocument provided: " + binaryJson.GetErrorMessage();
return false;
}
const auto saved = memPool.AppendString(TStringBuf(binaryJson->Data(), binaryJson->Size()));
Expand Down Expand Up @@ -98,8 +98,9 @@ static arrow::Status ConvertColumn(const NScheme::TTypeInfo colType, std::shared
}
} else {
const auto binaryJson = NBinaryJson::SerializeToBinaryJson(valueBuf);
if (!binaryJson.Defined()) {
return arrow::Status::SerializationError("Cannot serialize json: ", valueBuf);
if (binaryJson.IsFail()) {
return arrow::Status::SerializationError(
"Cannot serialize json (", binaryJson.GetErrorMessage(), "): ", valueBuf.SubStr(0, Min(valueBuf.Size(), 1024ul)));
}
auto appendResult = builder.Append(binaryJson->Data(), binaryJson->Size());
if (!appendResult.ok()) {
Expand Down
2 changes: 1 addition & 1 deletion ydb/core/formats/arrow/ut/ut_arrow.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,7 @@ struct TDataRow {
std::vector<TCell> cells(value.Cells().data(), value.Cells().data() + value.Cells().size());

auto binaryJson = NBinaryJson::SerializeToBinaryJson(TStringBuf(JsonDocument.data(), JsonDocument.size()));
UNIT_ASSERT(binaryJson.Defined());
UNIT_ASSERT(binaryJson.IsSuccess());

cells[19] = TCell(binaryJson->Data(), binaryJson->Size());
return TOwnedCellVec(cells);
Expand Down
13 changes: 9 additions & 4 deletions ydb/core/io_formats/cell_maker/cell_maker.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -87,8 +87,13 @@ namespace {
return false;
}

result = NBinaryJson::SerializeToBinaryJson(unescaped);
return result.Defined();
auto serializedJson = NBinaryJson::SerializeToBinaryJson(unescaped);
if (serializedJson.IsFail()) {
return false;
}

result = serializedJson.DetachResult();
return true;
}

template <>
Expand Down Expand Up @@ -395,8 +400,8 @@ bool MakeCell(TCell& cell, const NJson::TJsonValue& value, const NScheme::TTypeI
case NScheme::NTypeIds::Json:
return TCellMaker<TString, TStringBuf>::MakeDirect(cell, NFormats::WriteJson(value), pool, err);
case NScheme::NTypeIds::JsonDocument:
if (const auto& result = NBinaryJson::SerializeToBinaryJson(NFormats::WriteJson(value))) {
return TCellMaker<TMaybe<NBinaryJson::TBinaryJson>, TStringBuf>::MakeDirect(cell, result, pool, err, &BinaryJsonToStringBuf);
if (auto result = NBinaryJson::SerializeToBinaryJson(NFormats::WriteJson(value)); result.IsSuccess()) {
return TCellMaker<TMaybe<NBinaryJson::TBinaryJson>, TStringBuf>::MakeDirect(cell, result.DetachResult(), pool, err, &BinaryJsonToStringBuf);
} else {
return false;
}
Expand Down
2 changes: 1 addition & 1 deletion ydb/core/kqp/ut/scheme/kqp_scheme_ut.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8914,7 +8914,7 @@ Y_UNIT_TEST_SUITE(KqpOlapTypes) {
testHelper.CreateTable(testTable);
std::string jsonString = R"({"col1": "val1", "obj": {"obj_col2_int": 16}})";
auto maybeJsonDoc = NBinaryJson::SerializeToBinaryJson(jsonString);
Y_ABORT_UNLESS(maybeJsonDoc.Defined());
Y_ABORT_UNLESS(maybeJsonDoc.IsSuccess());
const std::string jsonBin(maybeJsonDoc->Data(), maybeJsonDoc->Size());
{
TTestHelper::TUpdatesBuilder tableInserter(testTable.GetArrowSchema(schema));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ PEERDIR(
contrib/libs/apache/arrow
ydb/library/conclusion
ydb/core/scheme_types
ydb/library/actors/core
)

END()
1 change: 1 addition & 0 deletions ydb/core/tx/data_events/common/ya.make
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ LIBRARY()

PEERDIR(
ydb/core/protos
ydb/library/conclusion
ydb/library/yql/core/issue/protos
ydb/public/api/protos
)
Expand Down
2 changes: 1 addition & 1 deletion ydb/core/tx/schemeshard/ut_restore/ut_restore.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1196,7 +1196,7 @@ value {
const TString string = "test string";
const TString json = R"({"key": "value"})";
auto binaryJson = NBinaryJson::SerializeToBinaryJson(json);
Y_ABORT_UNLESS(binaryJson.Defined());
Y_ABORT_UNLESS(binaryJson.IsSuccess());

const std::pair<ui64, ui64> decimal = NYql::NDecimal::MakePair(NYql::NDecimal::FromString("16.17", NScheme::DECIMAL_PRECISION, NScheme::DECIMAL_SCALE));
const std::pair<ui64, ui64> decimal35 = NYql::NDecimal::MakePair(NYql::NDecimal::FromString("555555555555555.123456789", 35, 10));
Expand Down
4 changes: 2 additions & 2 deletions ydb/core/ydb_convert/ydb_convert.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -512,7 +512,7 @@ Y_FORCE_INLINE void ConvertData(NUdf::TDataTypeId typeId, const Ydb::Value& valu
case NUdf::TDataType<NUdf::TJsonDocument>::Id: {
CheckTypeId(value.value_case(), Ydb::Value::kTextValue, "JsonDocument");
const auto binaryJson = NBinaryJson::SerializeToBinaryJson(value.text_value());
if (!binaryJson.Defined()) {
if (binaryJson.IsFail()) {
throw yexception() << "Invalid JsonDocument value";
}
res.SetBytes(binaryJson->Data(), binaryJson->Size());
Expand Down Expand Up @@ -1238,7 +1238,7 @@ bool CellFromProtoVal(const NScheme::TTypeInfo& type, i32 typmod, const Ydb::Val
}
case NScheme::NTypeIds::JsonDocument : {
const auto binaryJson = NBinaryJson::SerializeToBinaryJson(val.Gettext_value());
if (!binaryJson.Defined()) {
if (binaryJson.IsFail()) {
err = "Invalid JSON for JsonDocument provided";
return false;
}
Expand Down
50 changes: 50 additions & 0 deletions ydb/library/binary_json/ut_benchmark/write.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
#include <benchmark/benchmark.h>

#include <util/random/random.h>
#include <library/cpp/testing/unittest/registar.h>
#include <library/cpp/json/json_value.h>
#include <library/cpp/json/json_writer.h>

#include <ydb/library/binary_json/write.h>

// ya test -r -D BENCHMARK_MAKE_LARGE_PART
#ifndef BENCHMARK_MAKE_LARGE_PART
#define BENCHMARK_MAKE_LARGE_PART 0
#endif

using namespace NKikimr::NBinaryJson;

namespace {

static ui64 seed = 0;

NJson::TJsonValue GetTestJson(ui64 depth = 10, ui64 nChildren = 2) {
NJson::TJsonValue value;
if (depth == 1) {
value.SetValue(NUnitTest::RandomString(10, seed++));
return value;
}
for (ui64 i = 0; i < nChildren; ++i) {
value.InsertValue(NUnitTest::RandomString(10, seed++), GetTestJson(depth - 1));
}
return value;
}

TString GetTestJsonString() {
seed = 42;
return NJson::WriteJson(GetTestJson(3, 50));
}

static void BenchWriteSimdJson(benchmark::State& state) {
TString value = GetTestJsonString();
TStringBuf buf(value);
for (auto _ : state) {
auto result = SerializeToBinaryJson(buf);
benchmark::DoNotOptimize(result);
benchmark::ClobberMemory();
}
}

}

BENCHMARK(BenchWriteSimdJson)->MinTime(1);
30 changes: 30 additions & 0 deletions ydb/library/binary_json/ut_benchmark/ya.make
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
G_BENCHMARK()

TAG(ya:fat)
SIZE(LARGE)
TIMEOUT(600)

IF (BENCHMARK_MAKE_LARGE_PART)
CFLAGS(
-DBENCHMARK_MAKE_LARGE_PART=1
)
TIMEOUT(1200)
ENDIF()

SRCS(
write.cpp
)

PEERDIR(
library/cpp/testing/unittest
ydb/library/binary_json
ydb/library/yql/minikql/dom
ydb/library/yql/minikql/invoke_builtins/llvm14
ydb/library/yql/public/udf/service/exception_policy
ydb/library/yql/core/issue/protos
ydb/library/yql/sql/pg_dummy
)

YQL_LAST_ABI_VERSION()

END()
Loading
Loading