Skip to content

Commit

Permalink
1.1.0
Browse files Browse the repository at this point in the history
  • Loading branch information
bigerl authored Sep 9, 2021
1 parent a897a05 commit ee1e0a6
Show file tree
Hide file tree
Showing 4 changed files with 244 additions and 4 deletions.
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
cmake_minimum_required(VERSION 3.13)
project(tentris
LANGUAGES CXX
VERSION 1.0.7)
VERSION 1.1.0)
set(CMAKE_CXX_STANDARD 20)

configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cmake/version.hpp.in ${CMAKE_CURRENT_SOURCE_DIR}/src/lib/tentris/tentris_version.hpp)
Expand Down
2 changes: 1 addition & 1 deletion conanfile.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
boost/1.75.0
fmt/7.1.3
restinio/0.6.12
hypertrie/0.5.3@dice-group/stable
hypertrie/0.6.0@dice-group/stable
rdf-parser/0.13.0@dice-group/stable
sparql-parser-base/0.2.2@dice-group/stable

Expand Down
242 changes: 241 additions & 1 deletion src/exec/tools/IDs2Hypertrie.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,13 @@
#include <tentris/util/LogHelper.hpp>

namespace tentris::IDs2Hypertrie {
void writeNodeStatsTSVs(const auto &storage_3_uncompressed, const auto &storage_2_uncompressed,
const auto &storage_2_compressed, const auto &storage_1_uncompressed,
const auto &storage_1_compressed);

void writeNodeCountComparisonTSVs(const auto &storage_2_uncompressed, const auto &storage_2_compressed,
const auto &storage_1_uncompressed, const auto &storage_1_compressed);

void loadIDsAndWriteOutStats(const std::string &csv_file_path);
}
int main(int argc, char *argv[]) {
Expand Down Expand Up @@ -81,6 +88,24 @@ namespace tentris::IDs2Hypertrie {
auto end = steady_clock::now();
auto duration = end - start;

constexpr static auto uncompressed = hypertrie::internal::raw::NodeCompression::uncompressed;
constexpr static auto compressed = hypertrie::internal::raw::NodeCompression::compressed;

auto &storage = trie.context()->rawContext().storage;

const auto &storage_3_uncompressed = storage.getNodeStorage<3UL, uncompressed>();
const auto &storage_2_uncompressed = storage.getNodeStorage<2UL, uncompressed>();
const auto &storage_2_compressed = storage.getNodeStorage<2UL, compressed>();
const auto &storage_1_uncompressed = storage.getNodeStorage<1UL, uncompressed>();
const auto &storage_1_compressed = storage.getNodeStorage<1UL, compressed>();

writeNodeStatsTSVs(storage_3_uncompressed, storage_2_uncompressed, storage_2_compressed, storage_1_uncompressed,
storage_1_compressed);

writeNodeCountComparisonTSVs(storage_2_uncompressed, storage_2_compressed, storage_1_uncompressed,
storage_1_compressed);


std::cerr << "## total ## \n"
<< "triples processed: {}\n"_format(count)
<< "triples loaded: {}\n"_format(trie.size())
Expand All @@ -90,7 +115,222 @@ namespace tentris::IDs2Hypertrie {
(std::chrono::duration_cast<std::chrono::minutes>(duration) % 60).count(),
(std::chrono::duration_cast<std::chrono::seconds>(duration) % 60).count(),
(std::chrono::duration_cast<std::chrono::milliseconds>(duration) % 1000).count(),
std::chrono::duration_cast<std::chrono::milliseconds>(duration).count());
std::chrono::duration_cast<std::chrono::milliseconds>(duration).count())
<< "# hypertrie stats #\n"
<< "depth 3 uncompressed nodes: {}\n"_format(storage_3_uncompressed.size())
<< "depth 2 uncompressed nodes: {}\n"_format(storage.getNodeStorage<2UL, uncompressed>().size())
<< "depth 2 compressed nodes: {}\n"_format(storage.getNodeStorage<2UL, compressed>().size())
<< "depth 1 uncompressed nodes: {}\n"_format(storage.getNodeStorage<1UL, uncompressed>().size())
<< "depth 1 compressed nodes: {}\n"_format(storage.getNodeStorage<1UL, compressed>().size());
}

void writeNodeCountComparisonTSVs(const auto &storage_2_uncompressed, const auto &storage_2_compressed,
const auto &storage_1_uncompressed, const auto &storage_1_compressed) {
{
std::ofstream tsv_depth_2_comp("depth_2_node_count_comparision.tsv");
auto csv_writer = csv::make_tsv_writer(tsv_depth_2_comp);

csv_writer << std::make_tuple("hypertrie_type", "uncompressed_nodes", "compressed_nodes");

{ // baseline
size_t uc_nodes = [&]() {
size_t old_uc = 0;
for (auto[hash, node] : storage_2_uncompressed)
old_uc += node->ref_count();
for (auto[hash, node] : storage_2_compressed)
old_uc += node->ref_count();
return old_uc;
}();

size_t c_nodes = 0;

csv_writer << std::make_tuple("baseline", uc_nodes, c_nodes);
}

{ // compression
size_t uc_nodes = [&]() {
size_t old_uc = 0;
for (auto[hash, node] : storage_2_uncompressed)
old_uc += node->ref_count();
return old_uc;
}();

size_t c_nodes = [&]() {
size_t c_nodes = 0;
for (auto[hash, node] : storage_2_compressed)
c_nodes += node->ref_count();
return c_nodes;
}();

csv_writer << std::make_tuple("compression", uc_nodes, c_nodes);
}

{ // hash
size_t uc_nodes = storage_2_uncompressed.size() + storage_2_compressed.size();

size_t c_nodes = 0;

csv_writer << std::make_tuple("hash", uc_nodes, c_nodes);
}

{ // hash+compression and hash+compression+inline
size_t uc_nodes = storage_2_uncompressed.size();

size_t c_nodes = storage_2_compressed.size();

csv_writer << std::make_tuple("hash+compression", uc_nodes, c_nodes);
csv_writer << std::make_tuple("hash+compression+inline", uc_nodes, c_nodes);
}
}

{
std::ofstream tsv_depth_1_comp("depth_1_node_count_comparision.tsv");
auto csv_writer = csv::make_tsv_writer(tsv_depth_1_comp);

csv_writer << std::make_tuple("hypertrie_type", "uncompressed_nodes", "compressed_nodes");

{ // baseline
size_t c_depth2_nodes = [&]() {
size_t depth2nodes = 0;
for (auto[hash, node] : storage_2_uncompressed)
depth2nodes += node->ref_count();
return depth2nodes;
}();

size_t depth1_nodes = [&]() {
size_t uc_nodes = 0;
for (auto[hash, node] : storage_1_uncompressed)
uc_nodes += node->ref_count();
for (auto[hash, node] : storage_1_compressed)
uc_nodes += node->ref_count();
return uc_nodes;
}();

size_t uc_nodes = c_depth2_nodes + (depth1_nodes / 2);

size_t c_nodes = 0;

csv_writer << std::make_tuple("baseline", uc_nodes, c_nodes);
}

{ // compression
size_t uc_nodes = [&]() {
size_t uc_nodes = 0;
for (auto[hash, node] : storage_1_uncompressed)
uc_nodes += node->ref_count();
return uc_nodes;
}() / 2;

size_t c_nodes = [&]() {
size_t x = 0;
for (auto[hash, node] : storage_1_compressed)
x += node->ref_count();
return x;
}();
c_nodes = c_nodes / 2;

csv_writer << std::make_tuple("compression", uc_nodes, c_nodes);
}

{ // hash
using TensorHash = hypertrie::internal::raw::TensorHash;

size_t compressed_nodes_count = [&]() {
robin_hood::unordered_set<TensorHash> c_d1_hashes;

// add the hashes from depth 1 compressed nodes.
for (auto[hash, node] : storage_1_compressed)
c_d1_hashes.insert(hash);

// break apart the depth 2 compressed nodes and a Hash for each of both key parts
for (auto[hash, node] : storage_2_compressed) {
c_d1_hashes.insert(
TensorHash::getCompressedNodeHash<1, key_part_type>(
{node->key()[0]}, true));
c_d1_hashes.insert(
TensorHash::getCompressedNodeHash<1, key_part_type>(
{node->key()[1]}, true));
}

return c_d1_hashes.size();
}();

size_t uc_nodes = compressed_nodes_count + storage_1_uncompressed.size();

size_t c_nodes = 0;

csv_writer << std::make_tuple("hash", uc_nodes, c_nodes);
}

{ // hash+compression and hash+compression+inline
size_t uc_nodes = storage_1_uncompressed.size();

size_t c_nodes = storage_1_compressed.size();

csv_writer << std::make_tuple("hash+compression", uc_nodes, c_nodes);
csv_writer << std::make_tuple("hash+compression+inline", uc_nodes, 0);
}

}
}

void writeNodeStatsTSVs(const auto &storage_3_uncompressed, const auto &storage_2_uncompressed,
const auto &storage_2_compressed, const auto &storage_1_uncompressed,
const auto &storage_1_compressed) {
auto extactCompressionTag = [](const auto &hash) { return (hash.isCompressed()) ? "c" : "u"; };

{
std::ofstream tsv_depth_3("depth_3_nodes_stats.tsv"); // Can also use ofstream, etc.
auto csv_writer = csv::make_tsv_writer(tsv_depth_3);


csv_writer
<< std::make_tuple("node_type", "node_size", "dimension_1_size", "dimension_2_size",
"dimension_3_size",
"reference_count");
for (auto[hash, node] : storage_3_uncompressed) {
csv_writer << std::make_tuple(extactCompressionTag(hash), node->size(), node->edges(0).size(),
node->edges(1).size(),
node->edges(2).size(),
node->ref_count());
}
}

{
std::ofstream tsv_depth_2("depth_2_nodes_stats.tsv"); // Can also use ofstream, etc.
auto csv_writer = csv::make_tsv_writer(tsv_depth_2);

csv_writer
<< std::make_tuple("node_type", "node_size", "dimension_1_size", "dimension_2_size",
"reference_count");

for (auto[hash, node] : storage_2_compressed) {
csv_writer << std::make_tuple(extactCompressionTag(hash), node->size(), 1, 1, node->ref_count());
}

for (auto[hash, node] : storage_2_uncompressed) {
csv_writer
<< std::make_tuple(extactCompressionTag(hash), node->size(), node->edges(0).size(),
node->edges(1).size(), node->ref_count());
}
}

{
std::ofstream tsv_depth_1("depth_1_nodes_stats.tsv"); // Can also use ofstream, etc.
auto csv_writer = csv::make_tsv_writer(tsv_depth_1);

csv_writer << std::make_tuple("node_type", "node_size", "dimension_1_size", "reference_count");

for (auto[hash, node] : storage_1_compressed) {
csv_writer << std::make_tuple(extactCompressionTag(hash), node->size(), 1, node->ref_count());
}

for (auto[hash, node] : storage_1_uncompressed) {
csv_writer << std::make_tuple(extactCompressionTag(hash), node->size(), node->edges(0).size(),
node->ref_count());
}
}
}


}
2 changes: 1 addition & 1 deletion src/lib/tentris/store/TripleStore.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ namespace tentris::store {
"Subject or predicate of the triple have a term type that is not allowed there."};
++count;

if (trie.size() % bulk_size == 0) {
if (bulk_inserter.size() == bulk_size) {
bulk_inserter.flush();
logDebug("{:>10.3} mio triples processed."_format(double(count)/1'000'000));
logDebug("{:>10.3} mio triples loaded."_format(double(trie.size())/1'000'000));
Expand Down

0 comments on commit ee1e0a6

Please sign in to comment.