diff --git a/ydb/library/binary_json/benchmark/write.cpp b/ydb/library/binary_json/benchmark/write.cpp new file mode 100644 index 000000000000..c63257f10457 --- /dev/null +++ b/ydb/library/binary_json/benchmark/write.cpp @@ -0,0 +1,50 @@ +#include + +#include +#include +#include +#include + +#include + +// ya test -r -D BENCHMARK_MAKE_LARGE_PART +#ifndef BENCHMARK_MAKE_LARGE_PART +#define BENCHMARK_MAKE_LARGE_PART 0 +#endif + +using namespace NKikimr::NBinaryJson; + +namespace { + +static ui64 seed = 0; + +NJson::TJsonValue GetTestJson(ui64 depth = 10, ui64 nChildren = 2) { + NJson::TJsonValue value; + if (depth == 1) { + value.SetValue(NUnitTest::RandomString(10, seed++)); + return value; + } + for (ui64 i = 0; i < nChildren; ++i) { + value.InsertValue(NUnitTest::RandomString(10, seed++), GetTestJson(depth - 1)); + } + return value; +} + +TString GetTestJsonString() { + seed = 42; + return NJson::WriteJson(GetTestJson(2, 100)); +} + +static void BenchSimdJson(benchmark::State& state) { + TString value = GetTestJsonString(); + TStringBuf buf(value); + for (auto _ : state) { + auto result = SerializeToBinaryJson(buf); + benchmark::DoNotOptimize(result); + benchmark::ClobberMemory(); + } +} + +} + +BENCHMARK(BenchWriteSimdJson); diff --git a/ydb/library/binary_json/benchmark/ya.make b/ydb/library/binary_json/benchmark/ya.make new file mode 100644 index 000000000000..efabf1020ccf --- /dev/null +++ b/ydb/library/binary_json/benchmark/ya.make @@ -0,0 +1,25 @@ +G_BENCHMARK() + +TAG(ya:fat) +SIZE(LARGE) +TIMEOUT(600) + +IF (BENCHMARK_MAKE_LARGE_PART) + CFLAGS( + -DBENCHMARK_MAKE_LARGE_PART=1 + ) + TIMEOUT(1200) +ENDIF() + +SRCS( + write.cpp +) + +PEERDIR( + library/cpp/testing/unittest + ydb/library/binary_json +) + +YQL_LAST_ABI_VERSION() + +END() diff --git a/ydb/library/binary_json/write.cpp b/ydb/library/binary_json/write.cpp index 88f6338797c0..827912c7e09f 100644 --- a/ydb/library/binary_json/write.cpp +++ b/ydb/library/binary_json/write.cpp @@ -1,12 +1,16 @@ #include "write.h" +#include +#include +#include +#include +#include #include - -#include -#include -#include #include #include +#include +#include +#include #include @@ -74,38 +78,29 @@ struct TJsonIndex { ui32 InternKey(const TStringBuf value) { TotalKeysCount++; - const auto it = Keys.find(value); - if (it == Keys.end()) { - const ui32 currentIndex = LastFreeStringIndex++; - Keys[TString(value)] = currentIndex; + const auto [it, emplaced] = Keys.emplace(value, LastFreeStringIndex); + if (emplaced) { + ++LastFreeStringIndex; TotalKeyLength += value.length() + 1; - return currentIndex; - } else { - return it->second; } + return it->second; } ui32 InternString(const TStringBuf value) { - const auto it = Strings.find(value); - if (it == Strings.end()) { - const ui32 currentIndex = LastFreeStringIndex++; - Strings[value] = currentIndex; + const auto [it, emplaced] = Keys.emplace(value, LastFreeStringIndex); + if (emplaced) { + ++LastFreeStringIndex; TotalStringLength += value.length() + 1; - return currentIndex; - } else { - return it->second; } + return it->second; } ui32 InternNumber(double value) { - const auto it = Numbers.find(value); - if (it == Numbers.end()) { - const ui32 currentIndex = LastFreeNumberIndex++; - Numbers[value] = currentIndex; - return currentIndex; - } else { - return it->second; + const auto [it, emplaced] = Numbers.emplace(value, LastFreeNumberIndex); + if (emplaced) { + ++LastFreeNumberIndex; } + return it->second; } void AddContainer(EContainerType type) { @@ -551,17 +546,80 @@ void DomToJsonIndex(const NUdf::TUnboxedValue& value, TBinaryJsonCallbacks& call } } +void SimdJsonToJsonIndex(const simdjson::dom::element& value, TBinaryJsonCallbacks& callbacks) { + switch (value.type()) { + case simdjson::dom::element_type::STRING: { + std::string_view v; + Y_ABORT_UNLESS(value.get(v) == simdjson::SUCCESS); + callbacks.OnString(v); + break; + } + case simdjson::dom::element_type::BOOL: { + bool v; + Y_ABORT_UNLESS(value.get(v) == simdjson::SUCCESS); + callbacks.OnBoolean(v); + break; + } + case simdjson::dom::element_type::INT64: { + i64 v; + Y_ABORT_UNLESS(value.get(v) == simdjson::SUCCESS); + callbacks.OnInteger(v); + break; + } + case simdjson::dom::element_type::UINT64: { + ui64 v; + Y_ABORT_UNLESS(value.get(v) == simdjson::SUCCESS); + callbacks.OnUInteger(v); + break; + } + case simdjson::dom::element_type::DOUBLE: { + double v; + Y_ABORT_UNLESS(value.get(v) == simdjson::SUCCESS); + callbacks.OnDouble(v); + break; + } + case simdjson::dom::element_type::NULL_VALUE: + callbacks.OnNull(); + break; + case simdjson::dom::element_type::ARRAY: { + callbacks.OnOpenArray(); + + simdjson::dom::array v; + Y_ABORT_UNLESS(value.get(v) == simdjson::SUCCESS); + for (const auto& item : v) { + SimdJsonToJsonIndex(item, callbacks); + } + + callbacks.OnCloseArray(); + break; + } + case simdjson::dom::element_type::OBJECT: { + callbacks.OnOpenMap(); + + simdjson::dom::object v; + Y_ABORT_UNLESS(value.get(v) == simdjson::SUCCESS); + for (const auto& item : v) { + callbacks.OnMapKey(item.key); + SimdJsonToJsonIndex(item.value, callbacks); + } + + callbacks.OnCloseMap(); + break; + } + } +} } TMaybe SerializeToBinaryJsonImpl(const TStringBuf json) { - TMemoryInput input(json.data(), json.size()); - TBinaryJsonCallbacks callbacks(/* throwException */ false); - if (!ReadJson(&input, &callbacks)) { + simdjson::dom::parser parser; + auto doc = parser.parse(json); + if (doc.error() != simdjson::SUCCESS) { return Nothing(); } + TBinaryJsonCallbacks callbacks(/* throwException */ false); + SimdJsonToJsonIndex(doc.value(), callbacks); TBinaryJsonSerializer serializer(std::move(callbacks).GetResult()); return std::move(serializer).Serialize(); - } TMaybe SerializeToBinaryJson(const TStringBuf json) { diff --git a/ydb/library/binary_json/write.h b/ydb/library/binary_json/write.h index f1d4dad7cdc1..ec759f742688 100644 --- a/ydb/library/binary_json/write.h +++ b/ydb/library/binary_json/write.h @@ -12,6 +12,8 @@ namespace NKikimr::NBinaryJson { * @brief Translates textual JSON into BinaryJson */ TMaybe SerializeToBinaryJson(const TStringBuf json); +TMaybe SerializeToBinaryJsonOndemand(const TStringBuf json); +TMaybe SerializeToBinaryJsonRapidjson(const TStringBuf json); /** * @brief Translates DOM layout from `yql/library/dom` library into BinaryJson diff --git a/ydb/library/binary_json/ya.make b/ydb/library/binary_json/ya.make index 93b3032fd223..9ad18a3f5e53 100644 --- a/ydb/library/binary_json/ya.make +++ b/ydb/library/binary_json/ya.make @@ -9,6 +9,7 @@ YQL_ABI_VERSION( PEERDIR( library/cpp/json ydb/library/yql/minikql/dom + contrib/libs/simdjson ) SRCS( @@ -23,4 +24,5 @@ END() RECURSE_FOR_TESTS( ut + benchmark )