-
Notifications
You must be signed in to change notification settings - Fork 130
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[runtime] Add an in-memory cache for Benchmark protos. #263
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
# Copyright (c) Facebook, Inc. and its affiliates. | ||
# | ||
# This source code is licensed under the MIT license found in the | ||
# LICENSE file in the root directory of this source tree. | ||
# | ||
# This package implements the CompilerGym service runtime, which is the utility | ||
# code that creates RPC servers and dispatches to CompilationServices. | ||
load("@rules_cc//cc:defs.bzl", "cc_library") | ||
load("@rules_python//python:defs.bzl", "py_library") | ||
|
||
py_library( | ||
name = "runtime", | ||
srcs = ["__init__.py"], | ||
visibility = ["//visibility:public"], | ||
deps = [ | ||
":benchmark_cache", | ||
], | ||
) | ||
|
||
py_library( | ||
name = "benchmark_cache", | ||
srcs = ["benchmark_cache.py"], | ||
visibility = ["//tests/service/runtime:__subpackages__"], | ||
deps = [ | ||
"//compiler_gym/service/proto", | ||
], | ||
) | ||
|
||
cc_library( | ||
name = "BenchmarkCache", | ||
srcs = ["BenchmarkCache.cc"], | ||
hdrs = ["BenchmarkCache.h"], | ||
visibility = ["//tests/service/runtime:__subpackages__"], | ||
deps = [ | ||
"//compiler_gym/service/proto:compiler_gym_service_cc", | ||
"@boost//:filesystem", | ||
"@com_github_grpc_grpc//:grpc++", | ||
"@glog", | ||
], | ||
) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,83 @@ | ||
// Copyright (c) Facebook, Inc. and its affiliates. | ||
// | ||
// This source code is licensed under the MIT license found in the | ||
// LICENSE file in the root directory of this source tree. | ||
#include "compiler_gym/service/runtime/BenchmarkCache.h" | ||
|
||
#include <glog/logging.h> | ||
|
||
using grpc::Status; | ||
using grpc::StatusCode; | ||
|
||
namespace compiler_gym::runtime { | ||
|
||
BenchmarkCache::BenchmarkCache(size_t maxSizeInBytes, std::optional<std::mt19937_64> rand) | ||
: rand_(rand.has_value() ? *rand : std::mt19937_64(std::random_device()())), | ||
maxSizeInBytes_(maxSizeInBytes), | ||
sizeInBytes_(0){}; | ||
|
||
const Benchmark* BenchmarkCache::get(const std::string& uri) const { | ||
auto it = benchmarks_.find(uri); | ||
if (it == benchmarks_.end()) { | ||
return nullptr; | ||
} | ||
|
||
return &it->second; | ||
} | ||
|
||
void BenchmarkCache::add(const Benchmark&& benchmark) { | ||
VLOG(3) << "Caching benchmark " << benchmark.uri() << ". Cache size = " << sizeInBytes() | ||
<< " bytes, " << size() << " items"; | ||
|
||
// Remove any existing value to keep the cache size consistent. | ||
const auto it = benchmarks_.find(benchmark.uri()); | ||
if (it != benchmarks_.end()) { | ||
const size_t replacedSize = it->second.ByteSizeLong(); | ||
benchmarks_.erase(it); | ||
sizeInBytes_ -= replacedSize; | ||
} | ||
|
||
const size_t size = benchmark.ByteSizeLong(); | ||
if (sizeInBytes() + size > maxSizeInBytes()) { | ||
if (size > maxSizeInBytes()) { | ||
LOG(WARNING) << "Adding new benchmark with size " << size | ||
<< " bytes exceeds total target cache size of " << maxSizeInBytes() << " bytes"; | ||
} else { | ||
VLOG(3) << "Adding new benchmark with size " << size << " bytes exceeds maximum size " | ||
<< maxSizeInBytes() << " bytes, " << this->size() << " items"; | ||
} | ||
evictToCapacity(); | ||
} | ||
|
||
benchmarks_.insert({benchmark.uri(), std::move(benchmark)}); | ||
sizeInBytes_ += size; | ||
} | ||
|
||
void BenchmarkCache::evictToCapacity(std::optional<size_t> targetSize) { | ||
int evicted = 0; | ||
targetSize = targetSize.has_value() ? targetSize : maxSizeInBytes() / 2; | ||
|
||
while (size() && sizeInBytes() > targetSize) { | ||
// Select a benchmark randomly. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ignorable: I always thought the advantages of random over lru were that random doesn't need to keep the recently used list. If the data items are small that makes sense. Otherwise, isn't lru pretty much always better? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I figured that there are two use common cases for this cache: (1) in a tight loop over a couple hundred benchmarks that will all fit in cache (2) over a massive set of training programs where the chance of a cache hit is negligible. Given that, it didn't seem to me like LRU would be much of an advantage. I also found this interesting: "A random eviction policy degrades gracefully as the loop gets too big." from https://danluu.com/2choices-eviction/ Disclaimer: I know nothing about cache eviction policies and have utterly no idea what I'm talking about :) |
||
std::uniform_int_distribution<size_t> distribution(0, benchmarks_.size() - 1); | ||
size_t index = distribution(rand_); | ||
auto iterator = std::next(std::begin(benchmarks_), index); | ||
|
||
// Evict the benchmark from the pool of loaded benchmarks. | ||
++evicted; | ||
sizeInBytes_ -= iterator->second.ByteSizeLong(); | ||
benchmarks_.erase(iterator); | ||
} | ||
|
||
if (evicted) { | ||
VLOG(2) << "Evicted " << evicted << " benchmarks from cache. Benchmark cache " | ||
<< "size now " << sizeInBytes() << " bytes, " << benchmarks_.size() << " items"; | ||
} | ||
} | ||
|
||
void BenchmarkCache::setMaxSizeInBytes(size_t maxSizeInBytes) { | ||
maxSizeInBytes_ = maxSizeInBytes; | ||
evictToCapacity(maxSizeInBytes); | ||
} | ||
|
||
} // namespace compiler_gym::runtime |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
// Copyright (c) Facebook, Inc. and its affiliates. | ||
// | ||
// This source code is licensed under the MIT license found in the | ||
// LICENSE file in the root directory of this source tree. | ||
#pragma once | ||
|
||
#include <grpcpp/grpcpp.h> | ||
|
||
#include <memory> | ||
#include <mutex> | ||
#include <optional> | ||
#include <random> | ||
|
||
#include "boost/filesystem.hpp" | ||
#include "compiler_gym/service/proto/compiler_gym_service.pb.h" | ||
|
||
namespace compiler_gym::runtime { | ||
|
||
constexpr size_t kEvictionSizeInBytes = 512 * 1024 * 1024; | ||
|
||
// An in-memory cache of Benchmark protocol buffers. | ||
// | ||
// This object caches Benchmark messages by URI. Once the cache reaches a | ||
// predetermined size, benchmarks are evicted randomly until the capacity is | ||
// reduced to 50%. | ||
class BenchmarkCache { | ||
public: | ||
BenchmarkCache(size_t maxSizeInBytes = kEvictionSizeInBytes, | ||
std::optional<std::mt19937_64> rand = std::nullopt); | ||
|
||
// The pointer set by benchmark is valid only until the next call to add(). | ||
const Benchmark* get(const std::string& uri) const; | ||
|
||
// Move-insert the given benchmark to the cache. | ||
void add(const Benchmark&& benchmark); | ||
|
||
inline size_t size() const { return benchmarks_.size(); }; | ||
inline size_t sizeInBytes() const { return sizeInBytes_; }; | ||
inline size_t maxSizeInBytes() const { return maxSizeInBytes_; }; | ||
|
||
void setMaxSizeInBytes(size_t maxSizeInBytes); | ||
|
||
// Evict benchmarks randomly to reduce the capacity to the given size. If | ||
// targetSizeInBytes is not provided, benchmarks are evicted to 50% of | ||
// maxSizeInBytes. | ||
void evictToCapacity(std::optional<size_t> targetSizeInBytes = std::nullopt); | ||
|
||
private: | ||
std::unordered_map<std::string, const Benchmark> benchmarks_; | ||
|
||
std::mt19937_64 rand_; | ||
size_t maxSizeInBytes_; | ||
size_t sizeInBytes_; | ||
}; | ||
|
||
} // namespace compiler_gym::runtime |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
# Copyright (c) Facebook, Inc. and its affiliates. | ||
# | ||
# This source code is licensed under the MIT license found in the | ||
# LICENSE file in the root directory of this source tree. |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,128 @@ | ||
# Copyright (c) Facebook, Inc. and its affiliates. | ||
# | ||
# This source code is licensed under the MIT license found in the | ||
# LICENSE file in the root directory of this source tree. | ||
import logging | ||
from typing import Dict, Optional | ||
|
||
import numpy as np | ||
|
||
from compiler_gym.service.proto import Benchmark | ||
|
||
MAX_SIZE_IN_BYTES = 512 * 104 * 1024 | ||
|
||
|
||
class BenchmarkCache: | ||
"""An in-memory cache of Benchmark messages. | ||
|
||
This object caches Benchmark messages by URI. Once the cache reaches a | ||
predetermined size, benchmarks are evicted randomly until the capacity is | ||
reduced to 50%. | ||
""" | ||
|
||
def __init__( | ||
self, | ||
max_size_in_bytes: int = MAX_SIZE_IN_BYTES, | ||
rng: Optional[np.random.Generator] = None, | ||
logger: Optional[logging.Logger] = None, | ||
): | ||
self._max_size_in_bytes = max_size_in_bytes | ||
self.rng = rng or np.random.default_rng() | ||
self.logger = logger or logging.getLogger("compiler_gym") | ||
|
||
self._benchmarks: Dict[str, Benchmark] = {} | ||
self._size_in_bytes = 0 | ||
|
||
def __getitem__(self, uri: str) -> Benchmark: | ||
"""Get a benchmark by URI. Raises KeyError.""" | ||
item = self._benchmarks.get(uri) | ||
if item is None: | ||
raise KeyError(uri) | ||
return item | ||
|
||
def __contains__(self, uri: str): | ||
"""Whether URI is in cache.""" | ||
return uri in self._benchmarks | ||
|
||
def __setitem__(self, uri: str, benchmark: Benchmark): | ||
"""Add benchmark to cache.""" | ||
self.logger.debug( | ||
"Caching benchmark %s. Cache size = %d bytes, %d items", | ||
uri, | ||
self.size_in_bytes, | ||
self.size, | ||
) | ||
|
||
# Remove any existing value to keep the cache size consistent. | ||
if uri in self._benchmarks: | ||
self._size_in_bytes -= self._benchmarks[uri].ByteSize() | ||
del self._benchmarks[uri] | ||
|
||
size = benchmark.ByteSize() | ||
if self.size_in_bytes + size > self.max_size_in_bytes: | ||
if size > self.max_size_in_bytes: | ||
self.logger.warning( | ||
"Adding new benchmark with size %d bytes exceeds total " | ||
"target cache size of %d bytes", | ||
size, | ||
self.max_size_in_bytes, | ||
) | ||
else: | ||
self.logger.debug( | ||
"Adding new benchmark with size %d bytes " | ||
"exceeds maximum size %d bytes, %d items", | ||
size, | ||
self.max_size_in_bytes, | ||
self.size, | ||
) | ||
self.evict_to_capacity() | ||
|
||
self._benchmarks[uri] = benchmark | ||
self._size_in_bytes += size | ||
|
||
def evict_to_capacity(self, target_size_in_bytes: Optional[int] = None) -> None: | ||
"""Evict benchmarks randomly to reduce the capacity below 50%.""" | ||
evicted = 0 | ||
target_size_in_bytes = ( | ||
self.max_size_in_bytes // 2 | ||
if target_size_in_bytes is None | ||
else target_size_in_bytes | ||
) | ||
|
||
while self.size and self.size_in_bytes > target_size_in_bytes: | ||
evicted += 1 | ||
key = self.rng.choice(list(self._benchmarks.keys())) | ||
self._size_in_bytes -= self._benchmarks[key].ByteSize() | ||
del self._benchmarks[key] | ||
|
||
if evicted: | ||
self.logger.info( | ||
"Evicted %d benchmarks from cache. " | ||
"Benchmark cache size now %d bytes, %d items", | ||
evicted, | ||
self.size_in_bytes, | ||
self.size, | ||
) | ||
|
||
@property | ||
def size(self) -> int: | ||
"""The number of items in the cache.""" | ||
return len(self._benchmarks) | ||
|
||
@property | ||
def size_in_bytes(self) -> int: | ||
"""The combined size of the elements in the cache, excluding the | ||
cache overhead. | ||
""" | ||
return self._size_in_bytes | ||
|
||
@property | ||
def max_size_in_bytes(self) -> int: | ||
"""The maximum size of the cache.""" | ||
return self._max_size_in_bytes | ||
|
||
@max_size_in_bytes.setter | ||
def max_size_in_bytes(self, value: int) -> None: | ||
"""Set a new maximum cache size.""" | ||
self._max_size_in_bytes = value | ||
self.evict_to_capacity(target_size_in_bytes=value) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
# Copyright (c) Facebook, Inc. and its affiliates. | ||
# | ||
# This source code is licensed under the MIT license found in the | ||
# LICENSE file in the root directory of this source tree. | ||
load("@rules_cc//cc:defs.bzl", "cc_test") | ||
load("@rules_python//python:defs.bzl", "py_test") | ||
|
||
py_test( | ||
name = "benchmark_cache_test", | ||
srcs = ["benchmark_cache_test.py"], | ||
deps = [ | ||
"//compiler_gym/service/proto", | ||
"//compiler_gym/service/runtime:benchmark_cache", | ||
"//tests:test_main", | ||
], | ||
) | ||
|
||
cc_test( | ||
name = "BenchmarkCacheTest", | ||
srcs = ["BenchmarkCacheTest.cc"], | ||
deps = [ | ||
"//compiler_gym/service/proto:compiler_gym_service_cc", | ||
"//compiler_gym/service/runtime:BenchmarkCache", | ||
"//tests:TestMain", | ||
"@gtest", | ||
], | ||
) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Consider (but not for long, you know what I'm like):
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't know if that makes sense for this use case because there is no
std::function<Benchmark(string url)> benchmarkCreatorThingie
callback that would make sense. If the benchmark can't be found, we return an error to the frontend. Pseudo-code: