Skip to content

Commit

Permalink
Merge 38313d5 into 0be743e
Browse files Browse the repository at this point in the history
  • Loading branch information
swalrus1 authored Oct 16, 2024
2 parents 0be743e + 38313d5 commit d98dfdd
Show file tree
Hide file tree
Showing 4 changed files with 169 additions and 24 deletions.
50 changes: 50 additions & 0 deletions ydb/library/binary_json/benchmark/write.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
#include <benchmark/benchmark.h>

#include <util/random/random.h>
#include <library/cpp/testing/unittest/registar.h>
#include <library/cpp/json/json_value.h>
#include <library/cpp/json/json_writer.h>

#include <ydb/library/binary_json/write.h>

// ya test -r -D BENCHMARK_MAKE_LARGE_PART
#ifndef BENCHMARK_MAKE_LARGE_PART
#define BENCHMARK_MAKE_LARGE_PART 0
#endif

using namespace NKikimr::NBinaryJson;

namespace {

static ui64 seed = 0;

NJson::TJsonValue GetTestJson(ui64 depth = 10, ui64 nChildren = 2) {
NJson::TJsonValue value;
if (depth == 1) {
value.SetValue(NUnitTest::RandomString(10, seed++));
return value;
}
for (ui64 i = 0; i < nChildren; ++i) {
value.InsertValue(NUnitTest::RandomString(10, seed++), GetTestJson(depth - 1));
}
return value;
}

TString GetTestJsonString() {
seed = 42;
return NJson::WriteJson(GetTestJson(2, 100));
}

static void BenchWriteSimdJson(benchmark::State& state) {
TString value = GetTestJsonString();
TStringBuf buf(value);
for (auto _ : state) {
auto result = SerializeToBinaryJson(buf);
benchmark::DoNotOptimize(result);
benchmark::ClobberMemory();
}
}

}

BENCHMARK(BenchWriteSimdJson);
25 changes: 25 additions & 0 deletions ydb/library/binary_json/benchmark/ya.make
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
G_BENCHMARK()

TAG(ya:fat)
SIZE(LARGE)
TIMEOUT(600)

IF (BENCHMARK_MAKE_LARGE_PART)
CFLAGS(
-DBENCHMARK_MAKE_LARGE_PART=1
)
TIMEOUT(1200)
ENDIF()

SRCS(
write.cpp
)

PEERDIR(
library/cpp/testing/unittest
ydb/library/binary_json
)

YQL_LAST_ABI_VERSION()

END()
116 changes: 92 additions & 24 deletions ydb/library/binary_json/write.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
#include "write.h"

#include <contrib/libs/simdjson/include/simdjson/dom/array-inl.h>
#include <contrib/libs/simdjson/include/simdjson/dom/document-inl.h>
#include <contrib/libs/simdjson/include/simdjson/dom/element-inl.h>
#include <contrib/libs/simdjson/include/simdjson/dom/object-inl.h>
#include <contrib/libs/simdjson/include/simdjson/dom/parser-inl.h>
#include <library/cpp/json/json_reader.h>

#include <util/generic/vector.h>
Expand Down Expand Up @@ -74,38 +79,29 @@ struct TJsonIndex {
ui32 InternKey(const TStringBuf value) {
TotalKeysCount++;

const auto it = Keys.find(value);
if (it == Keys.end()) {
const ui32 currentIndex = LastFreeStringIndex++;
Keys[TString(value)] = currentIndex;
const auto [it, emplaced] = Keys.emplace(value, LastFreeStringIndex);
if (emplaced) {
++LastFreeStringIndex;
TotalKeyLength += value.length() + 1;
return currentIndex;
} else {
return it->second;
}
return it->second;
}

ui32 InternString(const TStringBuf value) {
const auto it = Strings.find(value);
if (it == Strings.end()) {
const ui32 currentIndex = LastFreeStringIndex++;
Strings[value] = currentIndex;
const auto [it, emplaced] = Keys.emplace(value, LastFreeStringIndex);
if (emplaced) {
++LastFreeStringIndex;
TotalStringLength += value.length() + 1;
return currentIndex;
} else {
return it->second;
}
return it->second;
}

ui32 InternNumber(double value) {
const auto it = Numbers.find(value);
if (it == Numbers.end()) {
const ui32 currentIndex = LastFreeNumberIndex++;
Numbers[value] = currentIndex;
return currentIndex;
} else {
return it->second;
const auto [it, emplaced] = Numbers.emplace(value, LastFreeNumberIndex);
if (emplaced) {
++LastFreeNumberIndex;
}
return it->second;
}

void AddContainer(EContainerType type) {
Expand Down Expand Up @@ -551,17 +547,89 @@ void DomToJsonIndex(const NUdf::TUnboxedValue& value, TBinaryJsonCallbacks& call
}
}

[[nodiscard]] simdjson::error_code SimdJsonToJsonIndex(const simdjson::dom::element& value, TBinaryJsonCallbacks& callbacks) {
#define RETURN_IF_NOT_SUCCESS(status) \
if (Y_UNLIKELY(status != simdjson::SUCCESS)) { \
return status; \
}

switch (value.type()) {
case simdjson::dom::element_type::STRING: {
std::string_view v;
RETURN_IF_NOT_SUCCESS(value.get(v));
callbacks.OnString(v);
break;
}
case simdjson::dom::element_type::BOOL: {
bool v;
RETURN_IF_NOT_SUCCESS(value.get(v));
callbacks.OnBoolean(v);
break;
}
case simdjson::dom::element_type::INT64: {
i64 v;
RETURN_IF_NOT_SUCCESS(value.get(v));
callbacks.OnInteger(v);
break;
}
case simdjson::dom::element_type::UINT64: {
ui64 v;
RETURN_IF_NOT_SUCCESS(value.get(v));
callbacks.OnUInteger(v);
break;
}
case simdjson::dom::element_type::DOUBLE: {
double v;
RETURN_IF_NOT_SUCCESS(value.get(v));
callbacks.OnDouble(v);
break;
}
case simdjson::dom::element_type::NULL_VALUE:
callbacks.OnNull();
break;
case simdjson::dom::element_type::ARRAY: {
callbacks.OnOpenArray();

simdjson::dom::array v;
RETURN_IF_NOT_SUCCESS(value.get(v));
for (const auto& item : v) {
RETURN_IF_NOT_SUCCESS(SimdJsonToJsonIndex(item, callbacks));
}

callbacks.OnCloseArray();
break;
}
case simdjson::dom::element_type::OBJECT: {
callbacks.OnOpenMap();

simdjson::dom::object v;
RETURN_IF_NOT_SUCCESS(value.get(v));
for (const auto& item : v) {
callbacks.OnMapKey(item.key);
RETURN_IF_NOT_SUCCESS(SimdJsonToJsonIndex(item.value, callbacks));
}

callbacks.OnCloseMap();
break;
}
}
return simdjson::SUCCESS;
#undef RETURN_IF_NOT_SUCCESS
}
}

TMaybe<TBinaryJson> SerializeToBinaryJsonImpl(const TStringBuf json) {
TMemoryInput input(json.data(), json.size());
simdjson::dom::parser parser;
auto doc = parser.parse(json);
if (doc.error() != simdjson::SUCCESS) {
return Nothing();
}
TBinaryJsonCallbacks callbacks(/* throwException */ false);
if (!ReadJson(&input, &callbacks)) {
if (SimdJsonToJsonIndex(doc.value(), callbacks) != simdjson::SUCCESS) {
return Nothing();
}
TBinaryJsonSerializer serializer(std::move(callbacks).GetResult());
return std::move(serializer).Serialize();

}

TMaybe<TBinaryJson> SerializeToBinaryJson(const TStringBuf json) {
Expand Down
2 changes: 2 additions & 0 deletions ydb/library/binary_json/ya.make
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ YQL_ABI_VERSION(
PEERDIR(
library/cpp/json
ydb/library/yql/minikql/dom
contrib/libs/simdjson
)

SRCS(
Expand All @@ -23,4 +24,5 @@ END()

RECURSE_FOR_TESTS(
ut
benchmark
)

0 comments on commit d98dfdd

Please sign in to comment.