Skip to content

Commit

Permalink
stats: add symbol table for future stat name encoding (envoyproxy#3927)
Browse files Browse the repository at this point in the history
Add symbol table implementation, to be used for encoding heap-allocated stat names in the future. The symbol table grows and shrinks dynamically with the number of unique string segments in it, and guarantees x = decode(encode(x)) for as long as at least one instance of each string segment in x exists. Implicitly splits on period-delimited stat names. Does not enforce stat name length or string segment length.

Risk Level: Low, adds library but does not add dependencies on it anywhere.
Testing: See //test/common/stats:symbol_table_test.
Docs Changes: None
Release Notes: None

Signed-off-by: James Buckland <jbuckland@google.com>
  • Loading branch information
ambuc authored and htuch committed Aug 28, 2018
1 parent c987b42 commit 53f8944
Show file tree
Hide file tree
Showing 7 changed files with 489 additions and 0 deletions.
5 changes: 5 additions & 0 deletions include/envoy/stats/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,11 @@ envoy_cc_library(
deps = ["//include/envoy/common:interval_set_interface"],
)

envoy_cc_library(
name = "symbol_table_interface",
hdrs = ["symbol_table.h"],
)

envoy_cc_library(
name = "timespan",
hdrs = ["timespan.h"],
Expand Down
50 changes: 50 additions & 0 deletions include/envoy/stats/symbol_table.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
#pragma once

#include <memory>
#include <string>
#include <vector>

#include "envoy/common/pure.h"

#include "absl/strings/string_view.h"

namespace Envoy {
namespace Stats {

/**
* Interface for storing a stat name.
*/
class StatName {
public:
virtual ~StatName(){};
virtual std::string toString() const PURE;
};

using StatNamePtr = std::unique_ptr<StatName>;

/**
* Interface for shortening and retrieving stat names.
*
* Guarantees that x = encode(x).toString() for any x.
*/
class SymbolTable {
public:
virtual ~SymbolTable() {}

/**
* Encodes a stat name into a StatNamePtr. Expects the name to be period-delimited.
*
* @param name the stat name to encode.
* @return StatNamePtr a unique_ptr to the StatName class encapsulating the symbol vector.
*/
virtual StatNamePtr encode(absl::string_view name) PURE;

/**
* Returns the size of a SymbolTable, as measured in number of symbols stored.
* @return size_t the size of the table.
*/
virtual size_t size() const PURE;
};

} // namespace Stats
} // namespace Envoy
13 changes: 13 additions & 0 deletions source/common/stats/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ envoy_cc_library(
":raw_stat_data_lib",
":source_impl_lib",
":stats_options_lib",
":symbol_table_lib",
":tag_extractor_lib",
":utility_lib",
"//include/envoy/common:time_interface",
Expand All @@ -117,6 +118,18 @@ envoy_cc_library(
],
)

envoy_cc_library(
name = "symbol_table_lib",
srcs = ["symbol_table_impl.cc"],
hdrs = ["symbol_table_impl.h"],
external_deps = ["abseil_base"],
deps = [
"//include/envoy/stats:symbol_table_interface",
"//source/common/common:assert_lib",
"//source/common/common:utility_lib",
],
)

envoy_cc_library(
name = "stats_options_lib",
hdrs = ["stats_options_impl.h"],
Expand Down
86 changes: 86 additions & 0 deletions source/common/stats/symbol_table_impl.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
#include "common/stats/symbol_table_impl.h"

#include <memory>
#include <unordered_map>
#include <vector>

#include "common/common/assert.h"

namespace Envoy {
namespace Stats {

// TODO(ambuc): There is a possible performance optimization here for avoiding the encoding of IPs,
// if they appear in stat names. We don't want to waste time symbolizing an integer as an integer,
// if we can help it.
StatNamePtr SymbolTableImpl::encode(const absl::string_view name) {
SymbolVec symbol_vec;
std::vector<absl::string_view> name_vec = absl::StrSplit(name, '.');
symbol_vec.reserve(name_vec.size());
std::transform(name_vec.begin(), name_vec.end(), std::back_inserter(symbol_vec),
[this](absl::string_view x) { return toSymbol(x); });

return std::make_unique<StatNameImpl>(symbol_vec, *this);
}

std::string SymbolTableImpl::decode(const SymbolVec& symbol_vec) const {
std::vector<absl::string_view> name;
name.reserve(symbol_vec.size());
std::transform(symbol_vec.begin(), symbol_vec.end(), std::back_inserter(name),
[this](Symbol x) { return fromSymbol(x); });
return absl::StrJoin(name, ".");
}

void SymbolTableImpl::free(const SymbolVec& symbol_vec) {
for (const Symbol symbol : symbol_vec) {
auto decode_search = decode_map_.find(symbol);
ASSERT(decode_search != decode_map_.end());

auto encode_search = encode_map_.find(decode_search->second);
ASSERT(encode_search != encode_map_.end());

encode_search->second.ref_count_--;
// If that was the last remaining client usage of the symbol, erase the the current
// mappings and add the now-unused symbol to the reuse pool.
if (encode_search->second.ref_count_ == 0) {
decode_map_.erase(decode_search);
encode_map_.erase(encode_search);
pool_.push(symbol);
}
}
}

Symbol SymbolTableImpl::toSymbol(absl::string_view sv) {
Symbol result;
auto encode_find = encode_map_.find(sv);
// If the string segment doesn't already exist,
if (encode_find == encode_map_.end()) {
// We create the actual string, place it in the decode_map_, and then insert a string_view
// pointing to it in the encode_map_. This allows us to only store the string once.
std::string str = std::string(sv);

auto decode_insert = decode_map_.insert({next_symbol_, std::move(str)});
ASSERT(decode_insert.second);

auto encode_insert = encode_map_.insert(
{decode_insert.first->second, {.symbol_ = next_symbol_, .ref_count_ = 1}});
ASSERT(encode_insert.second);

result = next_symbol_;
newSymbol();
} else {
// If the insertion didn't take place, return the actual value at that location and up the
// refcount at that location
result = encode_find->second.symbol_;
++(encode_find->second.ref_count_);
}
return result;
}

absl::string_view SymbolTableImpl::fromSymbol(const Symbol symbol) const {
auto search = decode_map_.find(symbol);
ASSERT(search != decode_map_.end());
return search->second;
}

} // namespace Stats
} // namespace Envoy
139 changes: 139 additions & 0 deletions source/common/stats/symbol_table_impl.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
#pragma once

#include <algorithm>
#include <memory>
#include <stack>
#include <string>
#include <unordered_map>
#include <vector>

#include "envoy/common/exception.h"
#include "envoy/stats/symbol_table.h"

#include "common/common/assert.h"
#include "common/common/utility.h"

#include "absl/strings/str_join.h"
#include "absl/strings/str_split.h"

namespace Envoy {
namespace Stats {

using Symbol = uint32_t;
using SymbolVec = std::vector<Symbol>;

/**
* Underlying SymbolTableImpl implementation which manages per-symbol reference counting.
*
* The underlying Symbol / SymbolVec data structures are private to the impl. One side
* effect of the non-monotonically-increasing symbol counter is that if a string is encoded, the
* resulting stat is destroyed, and then that same string is re-encoded, it may or may not encode to
* the same underlying symbol.
*/
class SymbolTableImpl : public SymbolTable {
public:
StatNamePtr encode(absl::string_view name) override;

// For testing purposes only.
size_t size() const override {
ASSERT(encode_map_.size() == decode_map_.size());
return encode_map_.size();
}

private:
friend class StatNameImpl;
friend class StatNameTest;

struct SharedSymbol {
Symbol symbol_;
uint32_t ref_count_;
};

/**
* Decodes a vector of symbols back into its period-delimited stat name.
* If decoding fails on any part of the symbol_vec, we release_assert and crash hard, since this
* should never happen, and we don't want to continue running with a corrupt stats set.
*
* @param symbol_vec the vector of symbols to decode.
* @return std::string the retrieved stat name.
*/
std::string decode(const SymbolVec& symbol_vec) const;

/**
* Since SymbolTableImpl does manual reference counting, a client of SymbolTable (such as
* StatName) must manually call free(symbol_vec) when it is freeing the stat it represents. This
* way, the symbol table will grow and shrink dynamically, instead of being write-only.
*
* @param symbol_vec the vector of symbols to be freed.
*/
void free(const SymbolVec& symbol_vec);

/**
* Convenience function for encode(), symbolizing one string segment at a time.
*
* @param sv the individual string to be encoded as a symbol.
* @return Symbol the encoded string.
*/
Symbol toSymbol(absl::string_view sv);

/**
* Convenience function for decode(), decoding one symbol at a time.
*
* @param symbol the individual symbol to be decoded.
* @return absl::string_view the decoded string.
*/
absl::string_view fromSymbol(Symbol symbol) const;

// Stages a new symbol for use. To be called after a successful insertion.
void newSymbol() {
if (pool_.empty()) {
next_symbol_ = ++monotonic_counter_;
} else {
next_symbol_ = pool_.top();
pool_.pop();
}
// This should catch integer overflow for the new symbol.
ASSERT(monotonic_counter_ != 0);
}

Symbol monotonicCounter() { return monotonic_counter_; }

// Stores the symbol to be used at next insertion. This should exist ahead of insertion time so
// that if insertion succeeds, the value written is the correct one.
Symbol next_symbol_ = 0;

// If the free pool is exhausted, we monotonically increase this counter.
Symbol monotonic_counter_ = 0;

// Bimap implementation.
// The encode map stores both the symbol and the ref count of that symbol.
// Using absl::string_view lets us only store the complete string once, in the decode map.
std::unordered_map<absl::string_view, SharedSymbol, StringViewHash> encode_map_;
std::unordered_map<Symbol, std::string> decode_map_;

// Free pool of symbols for re-use.
// TODO(ambuc): There might be an optimization here relating to storing ranges of freed symbols
// using an Envoy::IntervalSet.
std::stack<Symbol> pool_;
};

/**
* Implements RAII for Symbols, since the StatName destructor does the work of freeing its component
* symbols.
*/
class StatNameImpl : public StatName {
public:
StatNameImpl(SymbolVec symbol_vec, SymbolTableImpl& symbol_table)
: symbol_vec_(symbol_vec), symbol_table_(symbol_table) {}
~StatNameImpl() override { symbol_table_.free(symbol_vec_); }
std::string toString() const override { return symbol_table_.decode(symbol_vec_); }

private:
friend class StatNameTest;
SymbolVec symbolVec() { return symbol_vec_; }
SymbolVec symbol_vec_;
SymbolTableImpl& symbol_table_;
};

} // namespace Stats
} // namespace Envoy
12 changes: 12 additions & 0 deletions test/common/stats/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -54,10 +54,22 @@ envoy_cc_test(
],
)

envoy_cc_test(
name = "symbol_table_test",
srcs = ["symbol_table_test.cc"],
deps = [
"//source/common/stats:symbol_table_lib",
"//test/mocks/stats:stats_mocks",
"//test/test_common:logging_lib",
"//test/test_common:utility_lib",
],
)

envoy_cc_test(
name = "thread_local_store_test",
srcs = ["thread_local_store_test.cc"],
deps = [
"//source/common/stats:symbol_table_lib",
"//source/common/stats:thread_local_store_lib",
"//test/mocks/event:event_mocks",
"//test/mocks/server:server_mocks",
Expand Down
Loading

0 comments on commit 53f8944

Please sign in to comment.