forked from envoyproxy/envoy
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
stats: add symbol table for future stat name encoding (envoyproxy#3927)
Add symbol table implementation, to be used for encoding heap-allocated stat names in the future. The symbol table grows and shrinks dynamically with the number of unique string segments in it, and guarantees x = decode(encode(x)) for as long as at least one instance of each string segment in x exists. Implicitly splits on period-delimited stat names. Does not enforce stat name length or string segment length. Risk Level: Low, adds library but does not add dependencies on it anywhere. Testing: See //test/common/stats:symbol_table_test. Docs Changes: None Release Notes: None Signed-off-by: James Buckland <jbuckland@google.com>
- Loading branch information
Showing
7 changed files
with
489 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
#pragma once | ||
|
||
#include <memory> | ||
#include <string> | ||
#include <vector> | ||
|
||
#include "envoy/common/pure.h" | ||
|
||
#include "absl/strings/string_view.h" | ||
|
||
namespace Envoy { | ||
namespace Stats { | ||
|
||
/** | ||
* Interface for storing a stat name. | ||
*/ | ||
class StatName { | ||
public: | ||
virtual ~StatName(){}; | ||
virtual std::string toString() const PURE; | ||
}; | ||
|
||
using StatNamePtr = std::unique_ptr<StatName>; | ||
|
||
/** | ||
* Interface for shortening and retrieving stat names. | ||
* | ||
* Guarantees that x = encode(x).toString() for any x. | ||
*/ | ||
class SymbolTable { | ||
public: | ||
virtual ~SymbolTable() {} | ||
|
||
/** | ||
* Encodes a stat name into a StatNamePtr. Expects the name to be period-delimited. | ||
* | ||
* @param name the stat name to encode. | ||
* @return StatNamePtr a unique_ptr to the StatName class encapsulating the symbol vector. | ||
*/ | ||
virtual StatNamePtr encode(absl::string_view name) PURE; | ||
|
||
/** | ||
* Returns the size of a SymbolTable, as measured in number of symbols stored. | ||
* @return size_t the size of the table. | ||
*/ | ||
virtual size_t size() const PURE; | ||
}; | ||
|
||
} // namespace Stats | ||
} // namespace Envoy |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,86 @@ | ||
#include "common/stats/symbol_table_impl.h" | ||
|
||
#include <memory> | ||
#include <unordered_map> | ||
#include <vector> | ||
|
||
#include "common/common/assert.h" | ||
|
||
namespace Envoy { | ||
namespace Stats { | ||
|
||
// TODO(ambuc): There is a possible performance optimization here for avoiding the encoding of IPs, | ||
// if they appear in stat names. We don't want to waste time symbolizing an integer as an integer, | ||
// if we can help it. | ||
StatNamePtr SymbolTableImpl::encode(const absl::string_view name) { | ||
SymbolVec symbol_vec; | ||
std::vector<absl::string_view> name_vec = absl::StrSplit(name, '.'); | ||
symbol_vec.reserve(name_vec.size()); | ||
std::transform(name_vec.begin(), name_vec.end(), std::back_inserter(symbol_vec), | ||
[this](absl::string_view x) { return toSymbol(x); }); | ||
|
||
return std::make_unique<StatNameImpl>(symbol_vec, *this); | ||
} | ||
|
||
std::string SymbolTableImpl::decode(const SymbolVec& symbol_vec) const { | ||
std::vector<absl::string_view> name; | ||
name.reserve(symbol_vec.size()); | ||
std::transform(symbol_vec.begin(), symbol_vec.end(), std::back_inserter(name), | ||
[this](Symbol x) { return fromSymbol(x); }); | ||
return absl::StrJoin(name, "."); | ||
} | ||
|
||
void SymbolTableImpl::free(const SymbolVec& symbol_vec) { | ||
for (const Symbol symbol : symbol_vec) { | ||
auto decode_search = decode_map_.find(symbol); | ||
ASSERT(decode_search != decode_map_.end()); | ||
|
||
auto encode_search = encode_map_.find(decode_search->second); | ||
ASSERT(encode_search != encode_map_.end()); | ||
|
||
encode_search->second.ref_count_--; | ||
// If that was the last remaining client usage of the symbol, erase the the current | ||
// mappings and add the now-unused symbol to the reuse pool. | ||
if (encode_search->second.ref_count_ == 0) { | ||
decode_map_.erase(decode_search); | ||
encode_map_.erase(encode_search); | ||
pool_.push(symbol); | ||
} | ||
} | ||
} | ||
|
||
Symbol SymbolTableImpl::toSymbol(absl::string_view sv) { | ||
Symbol result; | ||
auto encode_find = encode_map_.find(sv); | ||
// If the string segment doesn't already exist, | ||
if (encode_find == encode_map_.end()) { | ||
// We create the actual string, place it in the decode_map_, and then insert a string_view | ||
// pointing to it in the encode_map_. This allows us to only store the string once. | ||
std::string str = std::string(sv); | ||
|
||
auto decode_insert = decode_map_.insert({next_symbol_, std::move(str)}); | ||
ASSERT(decode_insert.second); | ||
|
||
auto encode_insert = encode_map_.insert( | ||
{decode_insert.first->second, {.symbol_ = next_symbol_, .ref_count_ = 1}}); | ||
ASSERT(encode_insert.second); | ||
|
||
result = next_symbol_; | ||
newSymbol(); | ||
} else { | ||
// If the insertion didn't take place, return the actual value at that location and up the | ||
// refcount at that location | ||
result = encode_find->second.symbol_; | ||
++(encode_find->second.ref_count_); | ||
} | ||
return result; | ||
} | ||
|
||
absl::string_view SymbolTableImpl::fromSymbol(const Symbol symbol) const { | ||
auto search = decode_map_.find(symbol); | ||
ASSERT(search != decode_map_.end()); | ||
return search->second; | ||
} | ||
|
||
} // namespace Stats | ||
} // namespace Envoy |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,139 @@ | ||
#pragma once | ||
|
||
#include <algorithm> | ||
#include <memory> | ||
#include <stack> | ||
#include <string> | ||
#include <unordered_map> | ||
#include <vector> | ||
|
||
#include "envoy/common/exception.h" | ||
#include "envoy/stats/symbol_table.h" | ||
|
||
#include "common/common/assert.h" | ||
#include "common/common/utility.h" | ||
|
||
#include "absl/strings/str_join.h" | ||
#include "absl/strings/str_split.h" | ||
|
||
namespace Envoy { | ||
namespace Stats { | ||
|
||
using Symbol = uint32_t; | ||
using SymbolVec = std::vector<Symbol>; | ||
|
||
/** | ||
* Underlying SymbolTableImpl implementation which manages per-symbol reference counting. | ||
* | ||
* The underlying Symbol / SymbolVec data structures are private to the impl. One side | ||
* effect of the non-monotonically-increasing symbol counter is that if a string is encoded, the | ||
* resulting stat is destroyed, and then that same string is re-encoded, it may or may not encode to | ||
* the same underlying symbol. | ||
*/ | ||
class SymbolTableImpl : public SymbolTable { | ||
public: | ||
StatNamePtr encode(absl::string_view name) override; | ||
|
||
// For testing purposes only. | ||
size_t size() const override { | ||
ASSERT(encode_map_.size() == decode_map_.size()); | ||
return encode_map_.size(); | ||
} | ||
|
||
private: | ||
friend class StatNameImpl; | ||
friend class StatNameTest; | ||
|
||
struct SharedSymbol { | ||
Symbol symbol_; | ||
uint32_t ref_count_; | ||
}; | ||
|
||
/** | ||
* Decodes a vector of symbols back into its period-delimited stat name. | ||
* If decoding fails on any part of the symbol_vec, we release_assert and crash hard, since this | ||
* should never happen, and we don't want to continue running with a corrupt stats set. | ||
* | ||
* @param symbol_vec the vector of symbols to decode. | ||
* @return std::string the retrieved stat name. | ||
*/ | ||
std::string decode(const SymbolVec& symbol_vec) const; | ||
|
||
/** | ||
* Since SymbolTableImpl does manual reference counting, a client of SymbolTable (such as | ||
* StatName) must manually call free(symbol_vec) when it is freeing the stat it represents. This | ||
* way, the symbol table will grow and shrink dynamically, instead of being write-only. | ||
* | ||
* @param symbol_vec the vector of symbols to be freed. | ||
*/ | ||
void free(const SymbolVec& symbol_vec); | ||
|
||
/** | ||
* Convenience function for encode(), symbolizing one string segment at a time. | ||
* | ||
* @param sv the individual string to be encoded as a symbol. | ||
* @return Symbol the encoded string. | ||
*/ | ||
Symbol toSymbol(absl::string_view sv); | ||
|
||
/** | ||
* Convenience function for decode(), decoding one symbol at a time. | ||
* | ||
* @param symbol the individual symbol to be decoded. | ||
* @return absl::string_view the decoded string. | ||
*/ | ||
absl::string_view fromSymbol(Symbol symbol) const; | ||
|
||
// Stages a new symbol for use. To be called after a successful insertion. | ||
void newSymbol() { | ||
if (pool_.empty()) { | ||
next_symbol_ = ++monotonic_counter_; | ||
} else { | ||
next_symbol_ = pool_.top(); | ||
pool_.pop(); | ||
} | ||
// This should catch integer overflow for the new symbol. | ||
ASSERT(monotonic_counter_ != 0); | ||
} | ||
|
||
Symbol monotonicCounter() { return monotonic_counter_; } | ||
|
||
// Stores the symbol to be used at next insertion. This should exist ahead of insertion time so | ||
// that if insertion succeeds, the value written is the correct one. | ||
Symbol next_symbol_ = 0; | ||
|
||
// If the free pool is exhausted, we monotonically increase this counter. | ||
Symbol monotonic_counter_ = 0; | ||
|
||
// Bimap implementation. | ||
// The encode map stores both the symbol and the ref count of that symbol. | ||
// Using absl::string_view lets us only store the complete string once, in the decode map. | ||
std::unordered_map<absl::string_view, SharedSymbol, StringViewHash> encode_map_; | ||
std::unordered_map<Symbol, std::string> decode_map_; | ||
|
||
// Free pool of symbols for re-use. | ||
// TODO(ambuc): There might be an optimization here relating to storing ranges of freed symbols | ||
// using an Envoy::IntervalSet. | ||
std::stack<Symbol> pool_; | ||
}; | ||
|
||
/** | ||
* Implements RAII for Symbols, since the StatName destructor does the work of freeing its component | ||
* symbols. | ||
*/ | ||
class StatNameImpl : public StatName { | ||
public: | ||
StatNameImpl(SymbolVec symbol_vec, SymbolTableImpl& symbol_table) | ||
: symbol_vec_(symbol_vec), symbol_table_(symbol_table) {} | ||
~StatNameImpl() override { symbol_table_.free(symbol_vec_); } | ||
std::string toString() const override { return symbol_table_.decode(symbol_vec_); } | ||
|
||
private: | ||
friend class StatNameTest; | ||
SymbolVec symbolVec() { return symbol_vec_; } | ||
SymbolVec symbol_vec_; | ||
SymbolTableImpl& symbol_table_; | ||
}; | ||
|
||
} // namespace Stats | ||
} // namespace Envoy |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.