From 478b6d84f77241f18da0607da4b4ad2b66ee8903 Mon Sep 17 00:00:00 2001 From: Semyon Yentsov Date: Tue, 15 Oct 2024 15:16:07 +0000 Subject: [PATCH 1/5] use simdjson in SerializeToBinaryJson --- ydb/library/binary_json/write.cpp | 76 +++++++++++++++++++++++++++++-- ydb/library/binary_json/ya.make | 1 + 2 files changed, 73 insertions(+), 4 deletions(-) diff --git a/ydb/library/binary_json/write.cpp b/ydb/library/binary_json/write.cpp index 88f6338797c0..36c242391c3e 100644 --- a/ydb/library/binary_json/write.cpp +++ b/ydb/library/binary_json/write.cpp @@ -1,5 +1,9 @@ #include "write.h" +#include +#include +#include +#include #include #include @@ -551,17 +555,81 @@ void DomToJsonIndex(const NUdf::TUnboxedValue& value, TBinaryJsonCallbacks& call } } +void SimdJsonToJsonIndex(const simdjson::dom::element& value, TBinaryJsonCallbacks& callbacks) { + switch (value.type()) { + case simdjson::dom::element_type::STRING: { + std::string_view v; + Y_ABORT_UNLESS(value.get(v)); + callbacks.OnString(v); + break; + } + case simdjson::dom::element_type::BOOL: { + bool v; + Y_ABORT_UNLESS(value.get(v)); + callbacks.OnBoolean(v); + break; + } + case simdjson::dom::element_type::INT64: { + i64 v; + Y_ABORT_UNLESS(value.get(v)); + callbacks.OnInteger(v); + break; + } + case simdjson::dom::element_type::UINT64: { + ui64 v; + Y_ABORT_UNLESS(value.get(v)); + callbacks.OnUInteger(v); + break; + } + case simdjson::dom::element_type::DOUBLE: { + double v; + Y_ABORT_UNLESS(value.get(v)); + callbacks.OnUInteger(v); + break; + } + case simdjson::dom::element_type::NULL_VALUE: + callbacks.OnNull(); + break; + case simdjson::dom::element_type::ARRAY: { + callbacks.OnOpenArray(); + + simdjson::dom::array v; + Y_ABORT_UNLESS(value.get(v)); + for (const auto& item : v) { + SimdJsonToJsonIndex(item, callbacks); + } + + callbacks.OnCloseArray(); + break; + } + case simdjson::dom::element_type::OBJECT: { + callbacks.OnOpenMap(); + + simdjson::dom::object v; + Y_ABORT_UNLESS(value.get(v)); + for (const auto& item : v) { + callbacks.OnMapKey(item.key); + SimdJsonToJsonIndex(item.value, callbacks); + } + + callbacks.OnCloseMap(); + break; + } + } +} + } TMaybe SerializeToBinaryJsonImpl(const TStringBuf json) { - TMemoryInput input(json.data(), json.size()); - TBinaryJsonCallbacks callbacks(/* throwException */ false); - if (!ReadJson(&input, &callbacks)) { + simdjson::dom::parser parser; + auto doc = parser.parse(json); + if (doc.error() != simdjson::SUCCESS) { return Nothing(); } + TBinaryJsonCallbacks callbacks(/* throwException */ false); + SimdJsonToJsonIndex(doc.value(), callbacks); TBinaryJsonSerializer serializer(std::move(callbacks).GetResult()); return std::move(serializer).Serialize(); - } TMaybe SerializeToBinaryJson(const TStringBuf json) { diff --git a/ydb/library/binary_json/ya.make b/ydb/library/binary_json/ya.make index 93b3032fd223..6a27bd058d2f 100644 --- a/ydb/library/binary_json/ya.make +++ b/ydb/library/binary_json/ya.make @@ -9,6 +9,7 @@ YQL_ABI_VERSION( PEERDIR( library/cpp/json ydb/library/yql/minikql/dom + contrib/libs/simdjson ) SRCS( From ae0b7e97854060c684770697af9e589fc43f1f4a Mon Sep 17 00:00:00 2001 From: Semyon Yentsov Date: Tue, 15 Oct 2024 16:09:10 +0000 Subject: [PATCH 2/5] fix build --- ydb/library/binary_json/write.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/ydb/library/binary_json/write.cpp b/ydb/library/binary_json/write.cpp index 36c242391c3e..118a36650c1a 100644 --- a/ydb/library/binary_json/write.cpp +++ b/ydb/library/binary_json/write.cpp @@ -1,9 +1,10 @@ #include "write.h" -#include -#include #include +#include +#include #include +#include #include #include From 846f023b06437c8eae7e25fd3608cb8cf359b0b3 Mon Sep 17 00:00:00 2001 From: Semyon Yentsov Date: Tue, 15 Oct 2024 17:10:13 +0000 Subject: [PATCH 3/5] fix verify --- ydb/library/binary_json/write.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/ydb/library/binary_json/write.cpp b/ydb/library/binary_json/write.cpp index 118a36650c1a..887cdccc2a52 100644 --- a/ydb/library/binary_json/write.cpp +++ b/ydb/library/binary_json/write.cpp @@ -560,32 +560,32 @@ void SimdJsonToJsonIndex(const simdjson::dom::element& value, TBinaryJsonCallbac switch (value.type()) { case simdjson::dom::element_type::STRING: { std::string_view v; - Y_ABORT_UNLESS(value.get(v)); + Y_ABORT_UNLESS(value.get(v) == simdjson::SUCCESS); callbacks.OnString(v); break; } case simdjson::dom::element_type::BOOL: { bool v; - Y_ABORT_UNLESS(value.get(v)); + Y_ABORT_UNLESS(value.get(v) == simdjson::SUCCESS); callbacks.OnBoolean(v); break; } case simdjson::dom::element_type::INT64: { i64 v; - Y_ABORT_UNLESS(value.get(v)); + Y_ABORT_UNLESS(value.get(v) == simdjson::SUCCESS); callbacks.OnInteger(v); break; } case simdjson::dom::element_type::UINT64: { ui64 v; - Y_ABORT_UNLESS(value.get(v)); + Y_ABORT_UNLESS(value.get(v) == simdjson::SUCCESS); callbacks.OnUInteger(v); break; } case simdjson::dom::element_type::DOUBLE: { double v; - Y_ABORT_UNLESS(value.get(v)); - callbacks.OnUInteger(v); + Y_ABORT_UNLESS(value.get(v) == simdjson::SUCCESS); + callbacks.OnDouble(v); break; } case simdjson::dom::element_type::NULL_VALUE: @@ -595,7 +595,7 @@ void SimdJsonToJsonIndex(const simdjson::dom::element& value, TBinaryJsonCallbac callbacks.OnOpenArray(); simdjson::dom::array v; - Y_ABORT_UNLESS(value.get(v)); + Y_ABORT_UNLESS(value.get(v) == simdjson::SUCCESS); for (const auto& item : v) { SimdJsonToJsonIndex(item, callbacks); } @@ -607,7 +607,7 @@ void SimdJsonToJsonIndex(const simdjson::dom::element& value, TBinaryJsonCallbac callbacks.OnOpenMap(); simdjson::dom::object v; - Y_ABORT_UNLESS(value.get(v)); + Y_ABORT_UNLESS(value.get(v) == simdjson::SUCCESS); for (const auto& item : v) { callbacks.OnMapKey(item.key); SimdJsonToJsonIndex(item.value, callbacks); From 5a9a5e2a5d690f75b6a54a67ef4dd561077a20c9 Mon Sep 17 00:00:00 2001 From: Semyon Yentsov Date: Wed, 16 Oct 2024 10:26:50 +0000 Subject: [PATCH 4/5] wip --- .../operations/metadata/tiering_rule/object.h | 37 +++++ ydb/library/binary_json/benchmark/read.cpp | 94 +++++++++++ ydb/library/binary_json/benchmark/ya.make | 30 ++++ ydb/library/binary_json/write.cpp | 153 +++++++++++++++--- ydb/library/binary_json/write.h | 2 + ydb/library/binary_json/ya.make | 1 + 6 files changed, 291 insertions(+), 26 deletions(-) create mode 100644 ydb/core/tx/schemeshard/operations/metadata/tiering_rule/object.h create mode 100644 ydb/library/binary_json/benchmark/read.cpp create mode 100644 ydb/library/binary_json/benchmark/ya.make diff --git a/ydb/core/tx/schemeshard/operations/metadata/tiering_rule/object.h b/ydb/core/tx/schemeshard/operations/metadata/tiering_rule/object.h new file mode 100644 index 000000000000..5c573cd9ed7f --- /dev/null +++ b/ydb/core/tx/schemeshard/operations/metadata/tiering_rule/object.h @@ -0,0 +1,37 @@ +#pragma once + +#include +#include +#include + +namespace NKikimr::NSchemeShard::NOperations { + +class TTieringRuleEntity: public TMetadataEntity { +private: + static TFactory::TRegistrator Registrator; + +private: + using TBase = TMetadataEntity; + YDB_READONLY_DEF(TTieringRuleInfo::TPtr, TieringRuleInfo); + + std::shared_ptr MakeDropUpdate() const override; + +protected: + [[nodiscard]] TConclusionStatus DoInitialize(const TEntityInitializationContext& context) override; + + TTieringRuleEntity(const TPathId& pathId, const TTieringRuleInfo::TPtr& objectInfo) + : TBase(pathId) + , TieringRuleInfo(objectInfo) { + } + +public: + TString GetClassName() const override { + return "TIERING_RULE"; + } + +public: + TTieringRuleEntity(const TPathId& pathId) + : TBase(pathId) { + } +}; +} \ No newline at end of file diff --git a/ydb/library/binary_json/benchmark/read.cpp b/ydb/library/binary_json/benchmark/read.cpp new file mode 100644 index 000000000000..8e9360a0ed2e --- /dev/null +++ b/ydb/library/binary_json/benchmark/read.cpp @@ -0,0 +1,94 @@ +#include + +#include +#include +#include +#include + +#include + +// ya test -r -D BENCHMARK_MAKE_LARGE_PART +#ifndef BENCHMARK_MAKE_LARGE_PART +#define BENCHMARK_MAKE_LARGE_PART 0 +#endif + +using namespace NKikimr::NBinaryJson; + +namespace { + +static ui64 seed = 0; + +NJson::TJsonValue GetTestJson(ui64 depth = 10, ui64 nChildren = 2) { + NJson::TJsonValue value; + if (depth == 1) { + value.SetValue(NUnitTest::RandomString(10, seed++)); + return value; + } + for (ui64 i = 0; i < nChildren; ++i) { + value.InsertValue(NUnitTest::RandomString(10, seed++), GetTestJson(depth - 1)); + } + return value; +} + +TString GetTestJsonString() { + seed = 42; + return NJson::WriteJson(GetTestJson(2, 100)); +} + +static void BenchRapidJson(benchmark::State& state) { + TString value = GetTestJsonString(); + for (auto _ : state) { + auto result = SerializeToBinaryJsonRapidjson(value); + benchmark::DoNotOptimize(result); + benchmark::ClobberMemory(); + } +} + +static void BenchSimdJsonDom(benchmark::State& state) { + TString value = GetTestJsonString(); + TStringBuf buf(value); + for (auto _ : state) { + auto result = SerializeToBinaryJson(buf); + benchmark::DoNotOptimize(result); + benchmark::ClobberMemory(); + } +} + +static void BenchSimdJsonOndemand(benchmark::State& state) { + TString value = GetTestJsonString(); + TStringBuf buf(value); + for (auto _ : state) { + auto result = SerializeToBinaryJsonOndemand(buf); + benchmark::DoNotOptimize(result); + benchmark::ClobberMemory(); + } +} + +static void BenchSimdJsonAll(benchmark::State& state) { + TString value = GetTestJsonString(); + TStringBuf buf(value); + for (auto _ : state) { + { + auto result = SerializeToBinaryJson(buf); + benchmark::DoNotOptimize(result); + benchmark::ClobberMemory(); + } + { + auto result = SerializeToBinaryJsonOndemand(buf); + benchmark::DoNotOptimize(result); + benchmark::ClobberMemory(); + } + { + auto result = SerializeToBinaryJsonRapidjson(value); + benchmark::DoNotOptimize(result); + benchmark::ClobberMemory(); + } + } +} + +} + +BENCHMARK(BenchRapidJson); +BENCHMARK(BenchSimdJsonDom); +BENCHMARK(BenchSimdJsonOndemand); +BENCHMARK(BenchSimdJsonAll); diff --git a/ydb/library/binary_json/benchmark/ya.make b/ydb/library/binary_json/benchmark/ya.make new file mode 100644 index 000000000000..4e24f95ef3b0 --- /dev/null +++ b/ydb/library/binary_json/benchmark/ya.make @@ -0,0 +1,30 @@ +G_BENCHMARK() + +TAG(ya:fat) +SIZE(LARGE) +TIMEOUT(600) + +IF (BENCHMARK_MAKE_LARGE_PART) + CFLAGS( + -DBENCHMARK_MAKE_LARGE_PART=1 + ) + TIMEOUT(1200) +ENDIF() + +SRCS( + read.cpp +) + +PEERDIR( + library/cpp/testing/unittest + ydb/library/binary_json + ydb/library/yql/minikql/dom + ydb/library/yql/minikql/invoke_builtins/llvm14 + ydb/library/yql/public/udf/service/exception_policy + ydb/library/yql/core/issue/protos + ydb/library/yql/sql/pg_dummy +) + +YQL_LAST_ABI_VERSION() + +END() diff --git a/ydb/library/binary_json/write.cpp b/ydb/library/binary_json/write.cpp index 887cdccc2a52..2af42b0042ac 100644 --- a/ydb/library/binary_json/write.cpp +++ b/ydb/library/binary_json/write.cpp @@ -3,15 +3,15 @@ #include #include #include -#include #include +#include +#include #include - -#include -#include -#include #include #include +#include +#include +#include #include @@ -79,38 +79,29 @@ struct TJsonIndex { ui32 InternKey(const TStringBuf value) { TotalKeysCount++; - const auto it = Keys.find(value); - if (it == Keys.end()) { - const ui32 currentIndex = LastFreeStringIndex++; - Keys[TString(value)] = currentIndex; + const auto [it, emplaced] = Keys.emplace(value, LastFreeStringIndex); + if (emplaced) { + ++LastFreeStringIndex; TotalKeyLength += value.length() + 1; - return currentIndex; - } else { - return it->second; } + return it->second; } ui32 InternString(const TStringBuf value) { - const auto it = Strings.find(value); - if (it == Strings.end()) { - const ui32 currentIndex = LastFreeStringIndex++; - Strings[value] = currentIndex; + const auto [it, emplaced] = Keys.emplace(value, LastFreeStringIndex); + if (emplaced) { + ++LastFreeStringIndex; TotalStringLength += value.length() + 1; - return currentIndex; - } else { - return it->second; } + return it->second; } ui32 InternNumber(double value) { - const auto it = Numbers.find(value); - if (it == Numbers.end()) { - const ui32 currentIndex = LastFreeNumberIndex++; - Numbers[value] = currentIndex; - return currentIndex; - } else { - return it->second; + const auto [it, emplaced] = Numbers.emplace(value, LastFreeNumberIndex); + if (emplaced) { + ++LastFreeNumberIndex; } + return it->second; } void AddContainer(EContainerType type) { @@ -619,6 +610,91 @@ void SimdJsonToJsonIndex(const simdjson::dom::element& value, TBinaryJsonCallbac } } +template + requires std::is_same_v || std::is_same_v +simdjson::error_code SimdJsonToJsonIndex(TOnDemandValue& value, TBinaryJsonCallbacks& callbacks) { +#define RETURN_IF_NOT_SUCCESS(error) \ + if (Y_UNLIKELY(error != simdjson::SUCCESS)) { \ + return error; \ + } + + switch (value.type()) { + case simdjson::ondemand::json_type::string: { + std::string_view v; + RETURN_IF_NOT_SUCCESS(value.get(v)); + callbacks.OnString(v); + break; + } + case simdjson::ondemand::json_type::boolean: { + bool v; + RETURN_IF_NOT_SUCCESS(value.get(v)); + callbacks.OnBoolean(v); + break; + } + case simdjson::ondemand::json_type::number: { + switch (value.get_number_type()) { + case simdjson::fallback::number_type::floating_point_number: { + double v; + RETURN_IF_NOT_SUCCESS(value.get(v)); + callbacks.OnDouble(v); + break; + } + case simdjson::fallback::number_type::signed_integer: { + i64 v; + RETURN_IF_NOT_SUCCESS(value.get(v)); + callbacks.OnInteger(v); + break; + } + case simdjson::fallback::number_type::unsigned_integer: { + ui64 v; + RETURN_IF_NOT_SUCCESS(value.get(v)); + callbacks.OnUInteger(v); + break; + } + case simdjson::fallback::number_type::big_integer: + return simdjson::NUMBER_OUT_OF_RANGE; + } + break; + } + case simdjson::ondemand::json_type::null: + callbacks.OnNull(); + break; + case simdjson::ondemand::json_type::array: { + callbacks.OnOpenArray(); + + simdjson::ondemand::array v; + RETURN_IF_NOT_SUCCESS(value.get(v)); + for (auto item : v) { + RETURN_IF_NOT_SUCCESS(item.error()); + RETURN_IF_NOT_SUCCESS(SimdJsonToJsonIndex(item.value_unsafe(), callbacks)); + } + + callbacks.OnCloseArray(); + break; + } + case simdjson::ondemand::json_type::object: { + callbacks.OnOpenMap(); + + simdjson::ondemand::object v; + RETURN_IF_NOT_SUCCESS(value.get(v)); + for (auto item : v) { + RETURN_IF_NOT_SUCCESS(item.error()); + auto& keyValue = item.value_unsafe(); + const auto key = keyValue.unescaped_key(); + RETURN_IF_NOT_SUCCESS(key.error()); + callbacks.OnMapKey(key.value_unsafe()); + RETURN_IF_NOT_SUCCESS(SimdJsonToJsonIndex(keyValue.value(), callbacks)); + } + + callbacks.OnCloseMap(); + break; + } + } + + return simdjson::SUCCESS; + +#undef RETURN_IF_NOT_SUCCESS +} } TMaybe SerializeToBinaryJsonImpl(const TStringBuf json) { @@ -637,6 +713,31 @@ TMaybe SerializeToBinaryJson(const TStringBuf json) { return SerializeToBinaryJsonImpl(json); } +TMaybe SerializeToBinaryJsonOndemand(const TStringBuf json) { + const simdjson::padded_string paddedJson(json); + simdjson::ondemand::parser parser; + auto doc = parser.iterate(paddedJson); + if (doc.error() != simdjson::SUCCESS) { + return false; + } + TBinaryJsonCallbacks callbacks(/* throwException */ false); + if (SimdJsonToJsonIndex(doc.value_unsafe(), callbacks) != simdjson::SUCCESS) { + return false; + } + TBinaryJsonSerializer serializer(std::move(callbacks).GetResult()); + return std::move(serializer).Serialize(); +} + +TMaybe SerializeToBinaryJsonRapidjson(const TStringBuf json) { + TMemoryInput input(json.data(), json.size()); + TBinaryJsonCallbacks callbacks(/* throwException */ false); + if (!ReadJson(&input, &callbacks)) { + return Nothing(); + } + TBinaryJsonSerializer serializer(std::move(callbacks).GetResult()); + return std::move(serializer).Serialize(); +} + TBinaryJson SerializeToBinaryJson(const NUdf::TUnboxedValue& value) { TBinaryJsonCallbacks callbacks(/* throwException */ false); DomToJsonIndex(value, callbacks); diff --git a/ydb/library/binary_json/write.h b/ydb/library/binary_json/write.h index f1d4dad7cdc1..ec759f742688 100644 --- a/ydb/library/binary_json/write.h +++ b/ydb/library/binary_json/write.h @@ -12,6 +12,8 @@ namespace NKikimr::NBinaryJson { * @brief Translates textual JSON into BinaryJson */ TMaybe SerializeToBinaryJson(const TStringBuf json); +TMaybe SerializeToBinaryJsonOndemand(const TStringBuf json); +TMaybe SerializeToBinaryJsonRapidjson(const TStringBuf json); /** * @brief Translates DOM layout from `yql/library/dom` library into BinaryJson diff --git a/ydb/library/binary_json/ya.make b/ydb/library/binary_json/ya.make index 6a27bd058d2f..9ad18a3f5e53 100644 --- a/ydb/library/binary_json/ya.make +++ b/ydb/library/binary_json/ya.make @@ -24,4 +24,5 @@ END() RECURSE_FOR_TESTS( ut + benchmark ) From 50347d8da446e8b3f0b0ee4b1a32a3614acb6de2 Mon Sep 17 00:00:00 2001 From: Semyon Yentsov Date: Wed, 16 Oct 2024 10:30:33 +0000 Subject: [PATCH 5/5] leave only dom impl --- .../operations/metadata/tiering_rule/object.h | 37 ------ ydb/library/binary_json/benchmark/read.cpp | 94 --------------- ydb/library/binary_json/benchmark/write.cpp | 50 ++++++++ ydb/library/binary_json/benchmark/ya.make | 7 +- ydb/library/binary_json/write.cpp | 112 ------------------ 5 files changed, 51 insertions(+), 249 deletions(-) delete mode 100644 ydb/core/tx/schemeshard/operations/metadata/tiering_rule/object.h delete mode 100644 ydb/library/binary_json/benchmark/read.cpp create mode 100644 ydb/library/binary_json/benchmark/write.cpp diff --git a/ydb/core/tx/schemeshard/operations/metadata/tiering_rule/object.h b/ydb/core/tx/schemeshard/operations/metadata/tiering_rule/object.h deleted file mode 100644 index 5c573cd9ed7f..000000000000 --- a/ydb/core/tx/schemeshard/operations/metadata/tiering_rule/object.h +++ /dev/null @@ -1,37 +0,0 @@ -#pragma once - -#include -#include -#include - -namespace NKikimr::NSchemeShard::NOperations { - -class TTieringRuleEntity: public TMetadataEntity { -private: - static TFactory::TRegistrator Registrator; - -private: - using TBase = TMetadataEntity; - YDB_READONLY_DEF(TTieringRuleInfo::TPtr, TieringRuleInfo); - - std::shared_ptr MakeDropUpdate() const override; - -protected: - [[nodiscard]] TConclusionStatus DoInitialize(const TEntityInitializationContext& context) override; - - TTieringRuleEntity(const TPathId& pathId, const TTieringRuleInfo::TPtr& objectInfo) - : TBase(pathId) - , TieringRuleInfo(objectInfo) { - } - -public: - TString GetClassName() const override { - return "TIERING_RULE"; - } - -public: - TTieringRuleEntity(const TPathId& pathId) - : TBase(pathId) { - } -}; -} \ No newline at end of file diff --git a/ydb/library/binary_json/benchmark/read.cpp b/ydb/library/binary_json/benchmark/read.cpp deleted file mode 100644 index 8e9360a0ed2e..000000000000 --- a/ydb/library/binary_json/benchmark/read.cpp +++ /dev/null @@ -1,94 +0,0 @@ -#include - -#include -#include -#include -#include - -#include - -// ya test -r -D BENCHMARK_MAKE_LARGE_PART -#ifndef BENCHMARK_MAKE_LARGE_PART -#define BENCHMARK_MAKE_LARGE_PART 0 -#endif - -using namespace NKikimr::NBinaryJson; - -namespace { - -static ui64 seed = 0; - -NJson::TJsonValue GetTestJson(ui64 depth = 10, ui64 nChildren = 2) { - NJson::TJsonValue value; - if (depth == 1) { - value.SetValue(NUnitTest::RandomString(10, seed++)); - return value; - } - for (ui64 i = 0; i < nChildren; ++i) { - value.InsertValue(NUnitTest::RandomString(10, seed++), GetTestJson(depth - 1)); - } - return value; -} - -TString GetTestJsonString() { - seed = 42; - return NJson::WriteJson(GetTestJson(2, 100)); -} - -static void BenchRapidJson(benchmark::State& state) { - TString value = GetTestJsonString(); - for (auto _ : state) { - auto result = SerializeToBinaryJsonRapidjson(value); - benchmark::DoNotOptimize(result); - benchmark::ClobberMemory(); - } -} - -static void BenchSimdJsonDom(benchmark::State& state) { - TString value = GetTestJsonString(); - TStringBuf buf(value); - for (auto _ : state) { - auto result = SerializeToBinaryJson(buf); - benchmark::DoNotOptimize(result); - benchmark::ClobberMemory(); - } -} - -static void BenchSimdJsonOndemand(benchmark::State& state) { - TString value = GetTestJsonString(); - TStringBuf buf(value); - for (auto _ : state) { - auto result = SerializeToBinaryJsonOndemand(buf); - benchmark::DoNotOptimize(result); - benchmark::ClobberMemory(); - } -} - -static void BenchSimdJsonAll(benchmark::State& state) { - TString value = GetTestJsonString(); - TStringBuf buf(value); - for (auto _ : state) { - { - auto result = SerializeToBinaryJson(buf); - benchmark::DoNotOptimize(result); - benchmark::ClobberMemory(); - } - { - auto result = SerializeToBinaryJsonOndemand(buf); - benchmark::DoNotOptimize(result); - benchmark::ClobberMemory(); - } - { - auto result = SerializeToBinaryJsonRapidjson(value); - benchmark::DoNotOptimize(result); - benchmark::ClobberMemory(); - } - } -} - -} - -BENCHMARK(BenchRapidJson); -BENCHMARK(BenchSimdJsonDom); -BENCHMARK(BenchSimdJsonOndemand); -BENCHMARK(BenchSimdJsonAll); diff --git a/ydb/library/binary_json/benchmark/write.cpp b/ydb/library/binary_json/benchmark/write.cpp new file mode 100644 index 000000000000..c63257f10457 --- /dev/null +++ b/ydb/library/binary_json/benchmark/write.cpp @@ -0,0 +1,50 @@ +#include + +#include +#include +#include +#include + +#include + +// ya test -r -D BENCHMARK_MAKE_LARGE_PART +#ifndef BENCHMARK_MAKE_LARGE_PART +#define BENCHMARK_MAKE_LARGE_PART 0 +#endif + +using namespace NKikimr::NBinaryJson; + +namespace { + +static ui64 seed = 0; + +NJson::TJsonValue GetTestJson(ui64 depth = 10, ui64 nChildren = 2) { + NJson::TJsonValue value; + if (depth == 1) { + value.SetValue(NUnitTest::RandomString(10, seed++)); + return value; + } + for (ui64 i = 0; i < nChildren; ++i) { + value.InsertValue(NUnitTest::RandomString(10, seed++), GetTestJson(depth - 1)); + } + return value; +} + +TString GetTestJsonString() { + seed = 42; + return NJson::WriteJson(GetTestJson(2, 100)); +} + +static void BenchSimdJson(benchmark::State& state) { + TString value = GetTestJsonString(); + TStringBuf buf(value); + for (auto _ : state) { + auto result = SerializeToBinaryJson(buf); + benchmark::DoNotOptimize(result); + benchmark::ClobberMemory(); + } +} + +} + +BENCHMARK(BenchWriteSimdJson); diff --git a/ydb/library/binary_json/benchmark/ya.make b/ydb/library/binary_json/benchmark/ya.make index 4e24f95ef3b0..efabf1020ccf 100644 --- a/ydb/library/binary_json/benchmark/ya.make +++ b/ydb/library/binary_json/benchmark/ya.make @@ -12,17 +12,12 @@ IF (BENCHMARK_MAKE_LARGE_PART) ENDIF() SRCS( - read.cpp + write.cpp ) PEERDIR( library/cpp/testing/unittest ydb/library/binary_json - ydb/library/yql/minikql/dom - ydb/library/yql/minikql/invoke_builtins/llvm14 - ydb/library/yql/public/udf/service/exception_policy - ydb/library/yql/core/issue/protos - ydb/library/yql/sql/pg_dummy ) YQL_LAST_ABI_VERSION() diff --git a/ydb/library/binary_json/write.cpp b/ydb/library/binary_json/write.cpp index 2af42b0042ac..827912c7e09f 100644 --- a/ydb/library/binary_json/write.cpp +++ b/ydb/library/binary_json/write.cpp @@ -5,7 +5,6 @@ #include #include #include -#include #include #include #include @@ -609,92 +608,6 @@ void SimdJsonToJsonIndex(const simdjson::dom::element& value, TBinaryJsonCallbac } } } - -template - requires std::is_same_v || std::is_same_v -simdjson::error_code SimdJsonToJsonIndex(TOnDemandValue& value, TBinaryJsonCallbacks& callbacks) { -#define RETURN_IF_NOT_SUCCESS(error) \ - if (Y_UNLIKELY(error != simdjson::SUCCESS)) { \ - return error; \ - } - - switch (value.type()) { - case simdjson::ondemand::json_type::string: { - std::string_view v; - RETURN_IF_NOT_SUCCESS(value.get(v)); - callbacks.OnString(v); - break; - } - case simdjson::ondemand::json_type::boolean: { - bool v; - RETURN_IF_NOT_SUCCESS(value.get(v)); - callbacks.OnBoolean(v); - break; - } - case simdjson::ondemand::json_type::number: { - switch (value.get_number_type()) { - case simdjson::fallback::number_type::floating_point_number: { - double v; - RETURN_IF_NOT_SUCCESS(value.get(v)); - callbacks.OnDouble(v); - break; - } - case simdjson::fallback::number_type::signed_integer: { - i64 v; - RETURN_IF_NOT_SUCCESS(value.get(v)); - callbacks.OnInteger(v); - break; - } - case simdjson::fallback::number_type::unsigned_integer: { - ui64 v; - RETURN_IF_NOT_SUCCESS(value.get(v)); - callbacks.OnUInteger(v); - break; - } - case simdjson::fallback::number_type::big_integer: - return simdjson::NUMBER_OUT_OF_RANGE; - } - break; - } - case simdjson::ondemand::json_type::null: - callbacks.OnNull(); - break; - case simdjson::ondemand::json_type::array: { - callbacks.OnOpenArray(); - - simdjson::ondemand::array v; - RETURN_IF_NOT_SUCCESS(value.get(v)); - for (auto item : v) { - RETURN_IF_NOT_SUCCESS(item.error()); - RETURN_IF_NOT_SUCCESS(SimdJsonToJsonIndex(item.value_unsafe(), callbacks)); - } - - callbacks.OnCloseArray(); - break; - } - case simdjson::ondemand::json_type::object: { - callbacks.OnOpenMap(); - - simdjson::ondemand::object v; - RETURN_IF_NOT_SUCCESS(value.get(v)); - for (auto item : v) { - RETURN_IF_NOT_SUCCESS(item.error()); - auto& keyValue = item.value_unsafe(); - const auto key = keyValue.unescaped_key(); - RETURN_IF_NOT_SUCCESS(key.error()); - callbacks.OnMapKey(key.value_unsafe()); - RETURN_IF_NOT_SUCCESS(SimdJsonToJsonIndex(keyValue.value(), callbacks)); - } - - callbacks.OnCloseMap(); - break; - } - } - - return simdjson::SUCCESS; - -#undef RETURN_IF_NOT_SUCCESS -} } TMaybe SerializeToBinaryJsonImpl(const TStringBuf json) { @@ -713,31 +626,6 @@ TMaybe SerializeToBinaryJson(const TStringBuf json) { return SerializeToBinaryJsonImpl(json); } -TMaybe SerializeToBinaryJsonOndemand(const TStringBuf json) { - const simdjson::padded_string paddedJson(json); - simdjson::ondemand::parser parser; - auto doc = parser.iterate(paddedJson); - if (doc.error() != simdjson::SUCCESS) { - return false; - } - TBinaryJsonCallbacks callbacks(/* throwException */ false); - if (SimdJsonToJsonIndex(doc.value_unsafe(), callbacks) != simdjson::SUCCESS) { - return false; - } - TBinaryJsonSerializer serializer(std::move(callbacks).GetResult()); - return std::move(serializer).Serialize(); -} - -TMaybe SerializeToBinaryJsonRapidjson(const TStringBuf json) { - TMemoryInput input(json.data(), json.size()); - TBinaryJsonCallbacks callbacks(/* throwException */ false); - if (!ReadJson(&input, &callbacks)) { - return Nothing(); - } - TBinaryJsonSerializer serializer(std::move(callbacks).GetResult()); - return std::move(serializer).Serialize(); -} - TBinaryJson SerializeToBinaryJson(const NUdf::TUnboxedValue& value) { TBinaryJsonCallbacks callbacks(/* throwException */ false); DomToJsonIndex(value, callbacks);