Skip to content

Commit

Permalink
Merge 50347d8 into 030f818
Browse files Browse the repository at this point in the history
  • Loading branch information
swalrus1 authored Oct 16, 2024
2 parents 030f818 + 50347d8 commit 4fd7248
Show file tree
Hide file tree
Showing 5 changed files with 166 additions and 29 deletions.
50 changes: 50 additions & 0 deletions ydb/library/binary_json/benchmark/write.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
#include <benchmark/benchmark.h>

#include <util/random/random.h>
#include <library/cpp/testing/unittest/registar.h>
#include <library/cpp/json/json_value.h>
#include <library/cpp/json/json_writer.h>

#include <ydb/library/binary_json/write.h>

// ya test -r -D BENCHMARK_MAKE_LARGE_PART
#ifndef BENCHMARK_MAKE_LARGE_PART
#define BENCHMARK_MAKE_LARGE_PART 0
#endif

using namespace NKikimr::NBinaryJson;

namespace {

static ui64 seed = 0;

NJson::TJsonValue GetTestJson(ui64 depth = 10, ui64 nChildren = 2) {
NJson::TJsonValue value;
if (depth == 1) {
value.SetValue(NUnitTest::RandomString(10, seed++));
return value;
}
for (ui64 i = 0; i < nChildren; ++i) {
value.InsertValue(NUnitTest::RandomString(10, seed++), GetTestJson(depth - 1));
}
return value;
}

TString GetTestJsonString() {
seed = 42;
return NJson::WriteJson(GetTestJson(2, 100));
}

static void BenchSimdJson(benchmark::State& state) {
TString value = GetTestJsonString();
TStringBuf buf(value);
for (auto _ : state) {
auto result = SerializeToBinaryJson(buf);
benchmark::DoNotOptimize(result);
benchmark::ClobberMemory();
}
}

}

BENCHMARK(BenchWriteSimdJson);
25 changes: 25 additions & 0 deletions ydb/library/binary_json/benchmark/ya.make
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
G_BENCHMARK()

TAG(ya:fat)
SIZE(LARGE)
TIMEOUT(600)

IF (BENCHMARK_MAKE_LARGE_PART)
CFLAGS(
-DBENCHMARK_MAKE_LARGE_PART=1
)
TIMEOUT(1200)
ENDIF()

SRCS(
write.cpp
)

PEERDIR(
library/cpp/testing/unittest
ydb/library/binary_json
)

YQL_LAST_ABI_VERSION()

END()
116 changes: 87 additions & 29 deletions ydb/library/binary_json/write.cpp
Original file line number Diff line number Diff line change
@@ -1,12 +1,16 @@
#include "write.h"

#include <contrib/libs/simdjson/include/simdjson/dom/array-inl.h>
#include <contrib/libs/simdjson/include/simdjson/dom/document-inl.h>
#include <contrib/libs/simdjson/include/simdjson/dom/element-inl.h>
#include <contrib/libs/simdjson/include/simdjson/dom/object-inl.h>
#include <contrib/libs/simdjson/include/simdjson/dom/parser-inl.h>
#include <library/cpp/json/json_reader.h>

#include <util/generic/vector.h>
#include <util/generic/stack.h>
#include <util/generic/set.h>
#include <util/generic/algorithm.h>
#include <util/generic/map.h>
#include <util/generic/set.h>
#include <util/generic/stack.h>
#include <util/generic/vector.h>

#include <cmath>

Expand Down Expand Up @@ -74,38 +78,29 @@ struct TJsonIndex {
ui32 InternKey(const TStringBuf value) {
TotalKeysCount++;

const auto it = Keys.find(value);
if (it == Keys.end()) {
const ui32 currentIndex = LastFreeStringIndex++;
Keys[TString(value)] = currentIndex;
const auto [it, emplaced] = Keys.emplace(value, LastFreeStringIndex);
if (emplaced) {
++LastFreeStringIndex;
TotalKeyLength += value.length() + 1;
return currentIndex;
} else {
return it->second;
}
return it->second;
}

ui32 InternString(const TStringBuf value) {
const auto it = Strings.find(value);
if (it == Strings.end()) {
const ui32 currentIndex = LastFreeStringIndex++;
Strings[value] = currentIndex;
const auto [it, emplaced] = Keys.emplace(value, LastFreeStringIndex);
if (emplaced) {
++LastFreeStringIndex;
TotalStringLength += value.length() + 1;
return currentIndex;
} else {
return it->second;
}
return it->second;
}

ui32 InternNumber(double value) {
const auto it = Numbers.find(value);
if (it == Numbers.end()) {
const ui32 currentIndex = LastFreeNumberIndex++;
Numbers[value] = currentIndex;
return currentIndex;
} else {
return it->second;
const auto [it, emplaced] = Numbers.emplace(value, LastFreeNumberIndex);
if (emplaced) {
++LastFreeNumberIndex;
}
return it->second;
}

void AddContainer(EContainerType type) {
Expand Down Expand Up @@ -551,17 +546,80 @@ void DomToJsonIndex(const NUdf::TUnboxedValue& value, TBinaryJsonCallbacks& call
}
}

void SimdJsonToJsonIndex(const simdjson::dom::element& value, TBinaryJsonCallbacks& callbacks) {
switch (value.type()) {
case simdjson::dom::element_type::STRING: {
std::string_view v;
Y_ABORT_UNLESS(value.get(v) == simdjson::SUCCESS);
callbacks.OnString(v);
break;
}
case simdjson::dom::element_type::BOOL: {
bool v;
Y_ABORT_UNLESS(value.get(v) == simdjson::SUCCESS);
callbacks.OnBoolean(v);
break;
}
case simdjson::dom::element_type::INT64: {
i64 v;
Y_ABORT_UNLESS(value.get(v) == simdjson::SUCCESS);
callbacks.OnInteger(v);
break;
}
case simdjson::dom::element_type::UINT64: {
ui64 v;
Y_ABORT_UNLESS(value.get(v) == simdjson::SUCCESS);
callbacks.OnUInteger(v);
break;
}
case simdjson::dom::element_type::DOUBLE: {
double v;
Y_ABORT_UNLESS(value.get(v) == simdjson::SUCCESS);
callbacks.OnDouble(v);
break;
}
case simdjson::dom::element_type::NULL_VALUE:
callbacks.OnNull();
break;
case simdjson::dom::element_type::ARRAY: {
callbacks.OnOpenArray();

simdjson::dom::array v;
Y_ABORT_UNLESS(value.get(v) == simdjson::SUCCESS);
for (const auto& item : v) {
SimdJsonToJsonIndex(item, callbacks);
}

callbacks.OnCloseArray();
break;
}
case simdjson::dom::element_type::OBJECT: {
callbacks.OnOpenMap();

simdjson::dom::object v;
Y_ABORT_UNLESS(value.get(v) == simdjson::SUCCESS);
for (const auto& item : v) {
callbacks.OnMapKey(item.key);
SimdJsonToJsonIndex(item.value, callbacks);
}

callbacks.OnCloseMap();
break;
}
}
}
}

TMaybe<TBinaryJson> SerializeToBinaryJsonImpl(const TStringBuf json) {
TMemoryInput input(json.data(), json.size());
TBinaryJsonCallbacks callbacks(/* throwException */ false);
if (!ReadJson(&input, &callbacks)) {
simdjson::dom::parser parser;
auto doc = parser.parse(json);
if (doc.error() != simdjson::SUCCESS) {
return Nothing();
}
TBinaryJsonCallbacks callbacks(/* throwException */ false);
SimdJsonToJsonIndex(doc.value(), callbacks);
TBinaryJsonSerializer serializer(std::move(callbacks).GetResult());
return std::move(serializer).Serialize();

}

TMaybe<TBinaryJson> SerializeToBinaryJson(const TStringBuf json) {
Expand Down
2 changes: 2 additions & 0 deletions ydb/library/binary_json/write.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ namespace NKikimr::NBinaryJson {
* @brief Translates textual JSON into BinaryJson
*/
TMaybe<TBinaryJson> SerializeToBinaryJson(const TStringBuf json);
TMaybe<TBinaryJson> SerializeToBinaryJsonOndemand(const TStringBuf json);
TMaybe<TBinaryJson> SerializeToBinaryJsonRapidjson(const TStringBuf json);

/**
* @brief Translates DOM layout from `yql/library/dom` library into BinaryJson
Expand Down
2 changes: 2 additions & 0 deletions ydb/library/binary_json/ya.make
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ YQL_ABI_VERSION(
PEERDIR(
library/cpp/json
ydb/library/yql/minikql/dom
contrib/libs/simdjson
)

SRCS(
Expand All @@ -23,4 +24,5 @@ END()

RECURSE_FOR_TESTS(
ut
benchmark
)

0 comments on commit 4fd7248

Please sign in to comment.