From 00b763566b71834aac48a2fda3ebf292e3ad285b Mon Sep 17 00:00:00 2001 From: Argyrios Kyrtzidis Date: Wed, 10 Jan 2024 16:24:03 -0800 Subject: [PATCH] [cas/libclang] Add libclang APIs for restricting the size of the local CAS directory The new APIs allow the client to: * Get the current size * Set a size limit in bytes * Prune data from the directory The client decides how to derive the proper size limit. rdar://121129053 --- clang/include/clang-c/CAS.h | 34 +++++++++ clang/test/CAS/libclang-prune-data.c | 63 ++++++++++++++++ clang/tools/c-index-test/core_main.cpp | 56 +++++++++++++++ clang/tools/libclang/CCAS.cpp | 72 ++++++++++++++++--- clang/tools/libclang/libclang.map | 3 + llvm/include/llvm-c/CAS/PluginAPI_functions.h | 33 +++++++++ llvm/include/llvm/CAS/ObjectStore.h | 22 ++++++ llvm/include/llvm/CAS/UnifiedOnDiskCache.h | 11 ++- llvm/lib/CAS/OnDiskCAS.cpp | 15 ++++ llvm/lib/CAS/PluginAPI.h | 7 ++ llvm/lib/CAS/PluginAPI_functions.def | 3 + llvm/lib/CAS/PluginCAS.cpp | 38 ++++++++++ llvm/lib/CAS/UnifiedOnDiskCache.cpp | 18 +++-- .../libCASPluginTest/libCASPluginTest.cpp | 27 +++++++ .../libCASPluginTest/libCASPluginTest.exports | 3 + llvm/unittests/CAS/UnifiedOnDiskCacheTest.cpp | 10 ++- 16 files changed, 399 insertions(+), 16 deletions(-) create mode 100644 clang/test/CAS/libclang-prune-data.c diff --git a/clang/include/clang-c/CAS.h b/clang/include/clang-c/CAS.h index 04b06ef6297219..be4b5f6849fdd2 100644 --- a/clang/include/clang-c/CAS.h +++ b/clang/include/clang-c/CAS.h @@ -24,6 +24,7 @@ #include "clang-c/CXString.h" #include "clang-c/Platform.h" #include +#include #ifdef __cplusplus extern "C" { @@ -118,6 +119,39 @@ clang_experimental_cas_Databases_create(CXCASOptions Opts, CXString *Error); */ CINDEX_LINKAGE void clang_experimental_cas_Databases_dispose(CXCASDatabases); +/** + * Get the local storage size of the CAS/cache data in bytes. + * + * \param[out] OutError The error object to pass back to client (if any). + * If non-null the object must be disposed using \c clang_Error_dispose. + * \returns the local storage size of the CAS/cache data, or -1 if the + * implementation does not support reporting such size, or -2 if an error + * occurred. + */ +CINDEX_LINKAGE int64_t clang_experimental_cas_Databases_get_storage_size( + CXCASDatabases, CXError *OutError); + +/** + * Set the size for limiting disk storage growth. + * + * \param size_limit the maximum size limit in bytes. 0 means no limit. Negative + * values are invalid. + * \returns an error object if there was an error, NULL otherwise. + * If non-null the object must be disposed using \c clang_Error_dispose. + */ +CINDEX_LINKAGE CXError clang_experimental_cas_Databases_set_size_limit( + CXCASDatabases, int64_t size_limit); + +/** + * Prune local storage to reduce its size according to the desired size limit. + * Pruning can happen concurrently with other operations. + * + * \returns an error object if there was an error, NULL otherwise. + * If non-null the object must be disposed using \c clang_Error_dispose. + */ +CINDEX_LINKAGE +CXError clang_experimental_cas_Databases_prune_ondisk_data(CXCASDatabases); + /** * Loads an object using its printed \p CASID. * diff --git a/clang/test/CAS/libclang-prune-data.c b/clang/test/CAS/libclang-prune-data.c new file mode 100644 index 00000000000000..1a2fad431393a2 --- /dev/null +++ b/clang/test/CAS/libclang-prune-data.c @@ -0,0 +1,63 @@ +// REQUIRES: ondisk_cas + +// Tests that the CAS directory storage can be limited via libclang APIs. +// The test depends on internal details of the CAS directory structure. + +// RUN: rm -rf %t && mkdir -p %t + +// RUN: %clang -cc1depscan -fdepscan=inline -fdepscan-include-tree -o %t/t.rsp -cc1-args \ +// RUN: -cc1 -triple x86_64-apple-macos12 -fcas-path %t/cas -emit-obj %s -o %t/output.o +// RUN: %clang @%t/t.rsp +// RUN: ls %t/cas | wc -l | grep 2 +// RUN: ls %t/cas | grep v1.1 + +// Limit too high, no change. +// RUN: c-index-test core -prune-cas -cas-path %t/cas 100000000 +// RUN: ls %t/cas | wc -l | grep 2 + +// Under the limit, starts a chain. +// RUN: c-index-test core -prune-cas -cas-path %t/cas 10 +// RUN: ls %t/cas | wc -l | grep 3 +// RUN: ls %t/cas | grep v1.2 + +// Under the limit, starts a chain and abandons oldest dir. +// RUN: c-index-test core -prune-cas -cas-path %t/cas 10 +// RUN: ls %t/cas | wc -l | grep 4 +// RUN: ls %t/cas | grep v1.3 + +// Under the limit, removes abandonded dir, starts a chain and abandons oldest dir. +// RUN: c-index-test core -prune-cas -cas-path %t/cas 10 +// RUN: ls %t/cas | wc -l | grep 4 +// RUN: ls %t/cas | grep v1.4 +// RUN: ls %t/cas | grep -v v1.1 + +// Same test but using the plugin CAS. + +// RUN: rm -rf %t/cas + +// RUN: %clang -cc1depscan -fdepscan=inline -fdepscan-include-tree -o %t/t.rsp -cc1-args \ +// RUN: -cc1 -triple x86_64-apple-macos12 -fcas-path %t/cas -emit-obj %s -o %t/output.o \ +// RUN: -fcas-plugin-path %llvmshlibdir/libCASPluginTest%pluginext +// RUN: %clang @%t/t.rsp +// RUN: ls %t/cas | wc -l | grep 2 +// RUN: ls %t/cas | grep v1.1 + +// Limit too high, no change. +// RUN: c-index-test core -prune-cas -cas-path %t/cas 100000000 -fcas-plugin-path %llvmshlibdir/libCASPluginTest%pluginext +// RUN: ls %t/cas | wc -l | grep 2 + +// Under the limit, starts a chain. +// RUN: c-index-test core -prune-cas -cas-path %t/cas 10 -fcas-plugin-path %llvmshlibdir/libCASPluginTest%pluginext +// RUN: ls %t/cas | wc -l | grep 3 +// RUN: ls %t/cas | grep v1.2 + +// Under the limit, starts a chain and abandons oldest dir. +// RUN: c-index-test core -prune-cas -cas-path %t/cas 10 -fcas-plugin-path %llvmshlibdir/libCASPluginTest%pluginext +// RUN: ls %t/cas | wc -l | grep 4 +// RUN: ls %t/cas | grep v1.3 + +// Under the limit, removes abandonded dir, starts a chain and abandons oldest dir. +// RUN: c-index-test core -prune-cas -cas-path %t/cas 10 -fcas-plugin-path %llvmshlibdir/libCASPluginTest%pluginext +// RUN: ls %t/cas | wc -l | grep 4 +// RUN: ls %t/cas | grep v1.4 +// RUN: ls %t/cas | grep -v v1.1 diff --git a/clang/tools/c-index-test/core_main.cpp b/clang/tools/c-index-test/core_main.cpp index f1fe1fd4d50dc1..5700ae4902792d 100644 --- a/clang/tools/c-index-test/core_main.cpp +++ b/clang/tools/c-index-test/core_main.cpp @@ -60,6 +60,7 @@ enum class ActionType { ScanDepsByModuleName, MaterializeCachedJob, ReplayCachedJob, + PruneCAS, WatchDir, }; @@ -88,6 +89,7 @@ Action(cl::desc("Action:"), cl::init(ActionType::None), "Materialize cached compilation data from upstream CAS"), clEnumValN(ActionType::ReplayCachedJob, "replay-cached-job", "Replay a cached compilation from the CAS"), + clEnumValN(ActionType::PruneCAS, "prune-cas", "Prune CAS data"), clEnumValN(ActionType::WatchDir, "watch-dir", "Watch directory for file events")), cl::cat(IndexTestCoreCategory)); @@ -990,6 +992,43 @@ static int replayCachedJob(ArrayRef Args, return 0; } +static int pruneCAS(int64_t Limit, CXCASDatabases DBs) { + CXError Err = nullptr; + int64_t Size = clang_experimental_cas_Databases_get_storage_size(DBs, &Err); + if (Size == -2) { + llvm::errs() << "clang_experimental_cas_Databases_get_storage_size: " + << clang_Error_getDescription(Err) << "\n"; + clang_Error_dispose(Err); + return 1; + } + if (Size == -1) { + llvm::errs() + << "unsupported clang_experimental_cas_Databases_get_storage_size"; + return 1; + } + if (Size == 0) { + llvm::errs() + << "clang_experimental_cas_Databases_get_storage_size returned 0"; + return 1; + } + + if (CXError Err = + clang_experimental_cas_Databases_set_size_limit(DBs, Limit)) { + llvm::errs() << "clang_experimental_cas_Databases_set_size_limit: " + << clang_Error_getDescription(Err) << "\n"; + clang_Error_dispose(Err); + return 1; + } + if (CXError Err = clang_experimental_cas_Databases_prune_ondisk_data(DBs)) { + llvm::errs() << "clang_experimental_cas_Databases_prune_ondisk_data: " + << clang_Error_getDescription(Err) << "\n"; + clang_Error_dispose(Err); + return 1; + } + + return 0; +} + static void printSymbol(const IndexRecordDecl &Rec, raw_ostream &OS) { printSymbolInfo(Rec.SymInfo, OS); OS << " | "; @@ -1379,6 +1418,23 @@ int indextest_core_main(int argc, const char **argv) { options::InputFiles[0], DBs); } + if (options::Action == ActionType::PruneCAS) { + if (options::InputFiles.empty()) { + errs() << "error: missing size limit\n"; + return 1; + } + int64_t Limit; + if (StringRef(options::InputFiles[0]).getAsInteger(10, Limit)) { + errs() << "error: size limit not an integer\n"; + return 1; + } + if (!DBs) { + errs() << "error: CAS was not configured\n"; + return 1; + } + return pruneCAS(Limit, DBs); + } + if (options::Action == ActionType::WatchDir) { if (options::InputFiles.empty()) { errs() << "error: missing directory path\n"; diff --git a/clang/tools/libclang/CCAS.cpp b/clang/tools/libclang/CCAS.cpp index 2458782acde0b9..763261791dbc2b 100644 --- a/clang/tools/libclang/CCAS.cpp +++ b/clang/tools/libclang/CCAS.cpp @@ -58,14 +58,20 @@ DEFINE_SIMPLE_CONVERSION_FUNCTIONS(WrappedReplayResult, CXCASReplayResult) } // anonymous namespace +static void passAsCXError(Error &&E, CXError *OutError) { + if (OutError) + *OutError = cxerror::create(std::move(E)); + else + llvm::consumeError(std::move(E)); +} + CXCASCachedCompilation WrappedCachedCompilation::fromResultID( Expected> ResultID, CASID CacheKey, const std::shared_ptr &CAS, const std::shared_ptr &AC, CXError *OutError) { auto failure = [OutError](Error &&E) -> CXCASCachedCompilation { - if (OutError) - *OutError = cxerror::create(std::move(E)); + passAsCXError(std::move(E), OutError); return nullptr; }; @@ -139,6 +145,57 @@ void clang_experimental_cas_Databases_dispose(CXCASDatabases CDBs) { delete unwrap(CDBs); } +int64_t clang_experimental_cas_Databases_get_storage_size(CXCASDatabases CDBs, + CXError *OutError) { + // Commonly used ObjectStore implementations (on-disk and plugin) combine a + // CAS and action-cache into a single directory managing the storage + // holistically for both, so calling the ObjectStore API is sufficient. + // FIXME: For completeness we should figure out how to deal with potential + // implementations that use separate directories for CAS and action-cache. + std::optional Size; + if (Error E = unwrap(CDBs)->CAS->getStorageSize().moveInto(Size)) { + passAsCXError(std::move(E), OutError); + return -2; + } + if (!Size) + return -1; + return *Size; +} + +CXError clang_experimental_cas_Databases_set_size_limit(CXCASDatabases CDBs, + int64_t size_limit) { + // Commonly used ObjectStore implementations (on-disk and plugin) combine a + // CAS and action-cache into a single directory managing the storage + // holistically for both, so calling the ObjectStore API is sufficient. + // FIXME: For completeness we should figure out how to deal with potential + // implementations that use separate directories for CAS and action-cache. + std::optional SizeLimit; + if (size_limit < 0) { + return cxerror::create(llvm::createStringError( + llvm::inconvertibleErrorCode(), + "invalid size limit passed to " + "clang_experimental_cas_Databases_set_size_limit")); + } + if (size_limit > 0) { + SizeLimit = size_limit; + } + if (Error E = unwrap(CDBs)->CAS->setSizeLimit(SizeLimit)) + return cxerror::create(std::move(E)); + return nullptr; +} + +CXError +clang_experimental_cas_Databases_prune_ondisk_data(CXCASDatabases CDBs) { + // Commonly used ObjectStore implementations (on-disk and plugin) combine a + // CAS and action-cache into a single directory managing the storage + // holistically for both, so calling the ObjectStore API is sufficient. + // FIXME: For completeness we should figure out how to deal with potential + // implementations that use separate directories for CAS and action-cache. + if (Error E = unwrap(CDBs)->CAS->pruneStorageData()) + return cxerror::create(std::move(E)); + return nullptr; +} + CXCASObject clang_experimental_cas_loadObjectByString(CXCASDatabases CDBs, const char *PrintedID, CXError *OutError) { @@ -149,8 +206,7 @@ CXCASObject clang_experimental_cas_loadObjectByString(CXCASDatabases CDBs, *OutError = nullptr; auto failure = [OutError](Error &&E) -> CXCASObject { - if (OutError) - *OutError = cxerror::create(std::move(E)); + passAsCXError(std::move(E), OutError); return nullptr; }; @@ -311,8 +367,7 @@ clang_experimental_cas_getCachedCompilation(CXCASDatabases CDBs, *OutError = nullptr; auto failure = [OutError](Error &&E) -> CXCASCachedCompilation { - if (OutError) - *OutError = cxerror::create(std::move(E)); + passAsCXError(std::move(E), OutError); return nullptr; }; @@ -441,10 +496,7 @@ CXCASReplayResult clang_experimental_cas_replayCompilation( std::move(Invok), WorkingDirectory, WComp.CacheKey, WComp.CachedResult, DiagText) .moveInto(Ret)) { - if (OutError) - *OutError = cxerror::create(std::move(E)); - else - llvm::consumeError(std::move(E)); + passAsCXError(std::move(E), OutError); return nullptr; } diff --git a/clang/tools/libclang/libclang.map b/clang/tools/libclang/libclang.map index dbb76739355205..a2d859ce06bd96 100644 --- a/clang/tools/libclang/libclang.map +++ b/clang/tools/libclang/libclang.map @@ -486,6 +486,9 @@ LLVM_16 { clang_experimental_cas_CASObject_dispose; clang_experimental_cas_Databases_create; clang_experimental_cas_Databases_dispose; + clang_experimental_cas_Databases_get_storage_size; + clang_experimental_cas_Databases_prune_ondisk_data; + clang_experimental_cas_Databases_set_size_limit; clang_experimental_cas_getCachedCompilation; clang_experimental_cas_getCachedCompilation_async; clang_experimental_cas_loadObjectByString; diff --git a/llvm/include/llvm-c/CAS/PluginAPI_functions.h b/llvm/include/llvm-c/CAS/PluginAPI_functions.h index 8a7c9a64597ca1..f6ce65dd44470c 100644 --- a/llvm/include/llvm-c/CAS/PluginAPI_functions.h +++ b/llvm/include/llvm-c/CAS/PluginAPI_functions.h @@ -98,6 +98,39 @@ LLCAS_PUBLIC llcas_cas_t llcas_cas_create(llcas_cas_options_t, char **error); */ LLCAS_PUBLIC void llcas_cas_dispose(llcas_cas_t); +/** + * Get the local storage size of the CAS/cache data in bytes. + * + * \param error optional pointer to receive an error message if an error + * occurred. If set, the memory it points to needs to be released via + * \c llcas_string_dispose. + * \returns the local storage size of the CAS/cache data, or -1 if the + * implementation does not support reporting such size, or -2 if an error + * occurred. + */ +LLCAS_PUBLIC int64_t llcas_cas_get_ondisk_size(llcas_cas_t, char **error); + +/** + * Set the size for limiting disk storage growth. + * + * \param size_limit the maximum size limit in bytes. 0 means no limit. Negative + * values are invalid. + * \param error optional pointer to receive an error message if an error + * occurred. If set, the memory it points to needs to be released via + * \c llcas_string_dispose. + * \returns true if there was an error, false otherwise. + */ +LLCAS_PUBLIC bool +llcas_cas_set_ondisk_size_limit(llcas_cas_t, int64_t size_limit, char **error); + +/** + * Prune local storage to reduce its size according to the desired size limit. + * Pruning can happen concurrently with other operations. + * + * \returns true if there was an error, false otherwise. + */ +LLCAS_PUBLIC bool llcas_cas_prune_ondisk_data(llcas_cas_t, char **error); + /** * \returns the hash schema name that the plugin is using. The string memory it * points to needs to be released via \c llcas_string_dispose. diff --git a/llvm/include/llvm/CAS/ObjectStore.h b/llvm/include/llvm/CAS/ObjectStore.h index 4ddd9a952114e3..21599c8f3d1114 100644 --- a/llvm/include/llvm/CAS/ObjectStore.h +++ b/llvm/include/llvm/CAS/ObjectStore.h @@ -272,6 +272,28 @@ class ObjectStore { return Data.size(); } + /// Set the size for limiting growth of on-disk storage. This has an effect + /// for when the instance is closed. + /// + /// Implementations may be not have this implemented. + virtual Error setSizeLimit(std::optional SizeLimit) { + return Error::success(); + } + + /// \returns the storage size of the on-disk CAS data. + /// + /// Implementations that don't have an implementation for this should return + /// \p std::nullopt. + virtual Expected> getStorageSize() const { + return std::nullopt; + } + + /// Prune local storage to reduce its size according to the desired size + /// limit. Pruning can happen concurrently with other operations. + /// + /// Implementations may be not have this implemented. + virtual Error pruneStorageData() { return Error::success(); } + /// Validate the whole node tree. Error validateTree(ObjectRef Ref); diff --git a/llvm/include/llvm/CAS/UnifiedOnDiskCache.h b/llvm/include/llvm/CAS/UnifiedOnDiskCache.h index 9c076cdf5fd6e3..7681a66fa84fbf 100644 --- a/llvm/include/llvm/CAS/UnifiedOnDiskCache.h +++ b/llvm/include/llvm/CAS/UnifiedOnDiskCache.h @@ -91,6 +91,13 @@ class UnifiedOnDiskCache { /// \p SizeLimit was passed to the \p open call. Error close(bool CheckSizeLimit = true); + /// Set the size for limiting growth. This has an effect for when the instance + /// is closed. + void setSizeLimit(std::optional SizeLimit); + + /// \returns the storage size of the cache data. + uint64_t getStorageSize() const; + /// \returns whether the primary store has exceeded the intended size limit. /// This can return false even if the overall size of the opened directory is /// over the \p SizeLimit passed to \p open. To know whether garbage @@ -112,6 +119,8 @@ class UnifiedOnDiskCache { /// background, so that it has minimal effect on the workload of the process. static Error collectGarbage(StringRef Path); + Error collectGarbage(); + ~UnifiedOnDiskCache(); private: @@ -121,7 +130,7 @@ class UnifiedOnDiskCache { faultInFromUpstreamKV(ArrayRef Key); std::string RootPath; - std::optional SizeLimit; + std::atomic SizeLimit; int LockFD = -1; diff --git a/llvm/lib/CAS/OnDiskCAS.cpp b/llvm/lib/CAS/OnDiskCAS.cpp index 98fb934db26d3c..691378ef6c74e2 100644 --- a/llvm/lib/CAS/OnDiskCAS.cpp +++ b/llvm/lib/CAS/OnDiskCAS.cpp @@ -68,6 +68,10 @@ class OnDiskCAS : public BuiltinCAS { Error forEachRef(ObjectHandle Node, function_ref Callback) const final; + Error setSizeLimit(std::optional SizeLimit) final; + Expected> getStorageSize() const final; + Error pruneStorageData() final; + OnDiskCAS(std::unique_ptr DB_) : OwnedDB(std::move(DB_)), DB(OwnedDB.get()) {} @@ -137,6 +141,17 @@ Error OnDiskCAS::forEachRef(ObjectHandle Node, return Error::success(); } +Error OnDiskCAS::setSizeLimit(std::optional SizeLimit) { + UniDB->setSizeLimit(SizeLimit); + return Error::success(); +} + +Expected> OnDiskCAS::getStorageSize() const { + return UniDB->getStorageSize(); +} + +Error OnDiskCAS::pruneStorageData() { return UniDB->collectGarbage(); } + Expected> OnDiskCAS::open(StringRef AbsPath) { Expected> DB = ondisk::OnDiskGraphDB::open(AbsPath, BuiltinCASContext::getHashName(), diff --git a/llvm/lib/CAS/PluginAPI.h b/llvm/lib/CAS/PluginAPI.h index b9505677e6c11c..d691ece6b1e95b 100644 --- a/llvm/lib/CAS/PluginAPI.h +++ b/llvm/lib/CAS/PluginAPI.h @@ -34,6 +34,13 @@ struct llcas_functions_t { void (*cas_dispose)(llcas_cas_t); + int64_t (*cas_get_ondisk_size)(llcas_cas_t, char **error); + + bool (*cas_set_ondisk_size_limit)(llcas_cas_t, uint64_t size_limit, + char **error); + + bool (*cas_prune_ondisk_data)(llcas_cas_t, char **error); + unsigned (*digest_parse)(llcas_cas_t, const char *printed_digest, uint8_t *bytes, size_t bytes_size, char **error); diff --git a/llvm/lib/CAS/PluginAPI_functions.def b/llvm/lib/CAS/PluginAPI_functions.def index 17e60e510b2fcf..6da6f7473bb6ab 100644 --- a/llvm/lib/CAS/PluginAPI_functions.def +++ b/llvm/lib/CAS/PluginAPI_functions.def @@ -12,6 +12,7 @@ CASPLUGINAPI_FUNCTION(cas_create, true) CASPLUGINAPI_FUNCTION(cas_dispose, true) CASPLUGINAPI_FUNCTION(cas_get_hash_schema_name, true) CASPLUGINAPI_FUNCTION(cas_get_objectid, true) +CASPLUGINAPI_FUNCTION(cas_get_ondisk_size, false) CASPLUGINAPI_FUNCTION(cas_load_object, true) CASPLUGINAPI_FUNCTION(cas_load_object_async, true) CASPLUGINAPI_FUNCTION(cas_options_create, true) @@ -19,6 +20,8 @@ CASPLUGINAPI_FUNCTION(cas_options_dispose, true) CASPLUGINAPI_FUNCTION(cas_options_set_client_version, true) CASPLUGINAPI_FUNCTION(cas_options_set_ondisk_path, true) CASPLUGINAPI_FUNCTION(cas_options_set_option, true) +CASPLUGINAPI_FUNCTION(cas_prune_ondisk_data, false) +CASPLUGINAPI_FUNCTION(cas_set_ondisk_size_limit, false) CASPLUGINAPI_FUNCTION(cas_store_object, true) CASPLUGINAPI_FUNCTION(digest_parse, true) CASPLUGINAPI_FUNCTION(digest_print, true) diff --git a/llvm/lib/CAS/PluginCAS.cpp b/llvm/lib/CAS/PluginCAS.cpp index 5fb377300cb797..fe90bddb917a4f 100644 --- a/llvm/lib/CAS/PluginCAS.cpp +++ b/llvm/lib/CAS/PluginCAS.cpp @@ -148,6 +148,10 @@ class PluginObjectStore return Error::success(); } + Error setSizeLimit(std::optional SizeLimit) final; + Expected> getStorageSize() const final; + Error pruneStorageData() final; + PluginObjectStore(std::shared_ptr); std::shared_ptr Ctx; @@ -362,6 +366,40 @@ ArrayRef PluginObjectStore::getData(ObjectHandle Node, return ArrayRef((const char *)c_data.data, c_data.size); } +Error PluginObjectStore::setSizeLimit(std::optional SizeLimit) { + if (Ctx->Functions.cas_set_ondisk_size_limit) { + char *c_err = nullptr; + if (Ctx->Functions.cas_set_ondisk_size_limit(Ctx->c_cas, + SizeLimit.value_or(0), &c_err)) + return Ctx->errorAndDispose(c_err); + } + return Error::success(); +} + +Expected> PluginObjectStore::getStorageSize() const { + if (!Ctx->Functions.cas_get_ondisk_size) + return std::nullopt; + char *c_err = nullptr; + int64_t ret = Ctx->Functions.cas_get_ondisk_size(Ctx->c_cas, &c_err); + switch (ret) { + case -1: + return std::nullopt; + case -2: + return Ctx->errorAndDispose(c_err); + default: + return ret; + } +} + +Error PluginObjectStore::pruneStorageData() { + if (Ctx->Functions.cas_prune_ondisk_data) { + char *c_err = nullptr; + if (Ctx->Functions.cas_prune_ondisk_data(Ctx->c_cas, &c_err)) + return Ctx->errorAndDispose(c_err); + } + return Error::success(); +} + PluginObjectStore::PluginObjectStore(std::shared_ptr CASCtx) : ObjectStore(*CASCtx), Ctx(std::move(CASCtx)) {} diff --git a/llvm/lib/CAS/UnifiedOnDiskCache.cpp b/llvm/lib/CAS/UnifiedOnDiskCache.cpp index c71af6dc540507..e256f4abf9cc8f 100644 --- a/llvm/lib/CAS/UnifiedOnDiskCache.cpp +++ b/llvm/lib/CAS/UnifiedOnDiskCache.cpp @@ -234,7 +234,7 @@ UnifiedOnDiskCache::open(StringRef RootPath, std::optional SizeLimit, auto UniDB = std::unique_ptr(new UnifiedOnDiskCache()); UniDB->RootPath = RootPath; - UniDB->SizeLimit = SizeLimit; + UniDB->SizeLimit = SizeLimit.value_or(0); UniDB->LockFD = LockFD; UniDB->NeedsGarbageCollection = DBDirs.size() > 2; UniDB->PrimaryDBDir = PrimaryDir; @@ -246,8 +246,17 @@ UnifiedOnDiskCache::open(StringRef RootPath, std::optional SizeLimit, return std::move(UniDB); } +void UnifiedOnDiskCache::setSizeLimit(std::optional SizeLimit) { + this->SizeLimit = SizeLimit.value_or(0); +} + +uint64_t UnifiedOnDiskCache::getStorageSize() const { + return PrimaryGraphDB->getStorageSize() + PrimaryKVDB->getStorageSize(); +} + bool UnifiedOnDiskCache::hasExceededSizeLimit() const { - if (!SizeLimit) + uint64_t CurSizeLimit = SizeLimit; + if (!CurSizeLimit) return false; // We allow each of the directories in the chain to reach up to half the // intended size limit. Check whether the primary directory has exceeded half @@ -259,8 +268,7 @@ bool UnifiedOnDiskCache::hasExceededSizeLimit() const { // the primary has reached its own limit. Essentially in such situation we // prefer reclaiming the storage later in order to have more consistent cache // hits behavior. - return (*SizeLimit / 2) < - (PrimaryGraphDB->getStorageSize() + PrimaryKVDB->getStorageSize()); + return (CurSizeLimit / 2) < getStorageSize(); } Error UnifiedOnDiskCache::close(bool CheckSizeLimit) { @@ -337,3 +345,5 @@ Error UnifiedOnDiskCache::collectGarbage(StringRef Path) { } return Error::success(); } + +Error UnifiedOnDiskCache::collectGarbage() { return collectGarbage(RootPath); } diff --git a/llvm/tools/libCASPluginTest/libCASPluginTest.cpp b/llvm/tools/libCASPluginTest/libCASPluginTest.cpp index b7bfde7f21a0b1..a53b287e6b39bc 100644 --- a/llvm/tools/libCASPluginTest/libCASPluginTest.cpp +++ b/llvm/tools/libCASPluginTest/libCASPluginTest.cpp @@ -291,6 +291,33 @@ llcas_cas_t llcas_cas_create(llcas_cas_options_t c_opts, char **error) { void llcas_cas_dispose(llcas_cas_t c_cas) { delete unwrap(c_cas); } +int64_t llcas_cas_get_ondisk_size(llcas_cas_t c_cas, char **error) { + return unwrap(c_cas)->DB->getStorageSize(); +} + +bool llcas_cas_set_ondisk_size_limit(llcas_cas_t c_cas, int64_t size_limit, + char **error) { + std::optional SizeLimit; + if (size_limit < 0) { + return reportError( + llvm::createStringError( + llvm::inconvertibleErrorCode(), + "invalid size limit passed to llcas_cas_set_ondisk_size_limit"), + error, true); + } + if (size_limit > 0) { + SizeLimit = size_limit; + } + unwrap(c_cas)->DB->setSizeLimit(SizeLimit); + return false; +} + +bool llcas_cas_prune_ondisk_data(llcas_cas_t c_cas, char **error) { + if (Error E = unwrap(c_cas)->DB->collectGarbage()) + return reportError(std::move(E), error, true); + return false; +} + void llcas_cas_options_set_client_version(llcas_cas_options_t, unsigned major, unsigned minor) { // Ignore for now. diff --git a/llvm/tools/libCASPluginTest/libCASPluginTest.exports b/llvm/tools/libCASPluginTest/libCASPluginTest.exports index 8fda2c5559c92f..07478da1e1e886 100644 --- a/llvm/tools/libCASPluginTest/libCASPluginTest.exports +++ b/llvm/tools/libCASPluginTest/libCASPluginTest.exports @@ -7,6 +7,7 @@ llcas_cas_create llcas_cas_dispose llcas_cas_get_hash_schema_name llcas_cas_get_objectid +llcas_cas_get_ondisk_size llcas_cas_load_object llcas_cas_load_object_async llcas_cas_options_create @@ -14,6 +15,8 @@ llcas_cas_options_dispose llcas_cas_options_set_client_version llcas_cas_options_set_ondisk_path llcas_cas_options_set_option +llcas_cas_prune_ondisk_data +llcas_cas_set_ondisk_size_limit llcas_cas_store_object llcas_digest_parse llcas_digest_print diff --git a/llvm/unittests/CAS/UnifiedOnDiskCacheTest.cpp b/llvm/unittests/CAS/UnifiedOnDiskCacheTest.cpp index eafa07ff0f4d27..dc83125be96086 100644 --- a/llvm/unittests/CAS/UnifiedOnDiskCacheTest.cpp +++ b/llvm/unittests/CAS/UnifiedOnDiskCacheTest.cpp @@ -47,9 +47,9 @@ TEST(UnifiedOnDiskCacheTest, Basic) { unittest::TempDir Temp("ondisk-unified", /*Unique=*/true); std::unique_ptr UniDB; + const uint64_t SizeLimit = 1024ull * 64; auto reopenDB = [&]() { UniDB.reset(); - const uint64_t SizeLimit = 1024ull * 64; ASSERT_THAT_ERROR(UnifiedOnDiskCache::open(Temp.path(), SizeLimit, "blake3", sizeof(HashType)) .moveInto(UniDB), @@ -129,10 +129,16 @@ TEST(UnifiedOnDiskCacheTest, Basic) { Succeeded()); }; + uint64_t PrevStoreSize = UniDB->getStorageSize(); unsigned Index = 0; while (!UniDB->hasExceededSizeLimit()) { storeBigObject(Index++); } + EXPECT_GT(UniDB->getStorageSize(), PrevStoreSize); + UniDB->setSizeLimit(SizeLimit * 2); + EXPECT_FALSE(UniDB->hasExceededSizeLimit()); + UniDB->setSizeLimit(SizeLimit); + EXPECT_TRUE(UniDB->hasExceededSizeLimit()); reopenDB(); @@ -169,6 +175,8 @@ TEST(UnifiedOnDiskCacheTest, Basic) { checkRootTree(); checkKey(Key1Hash, "root"); + EXPECT_LT(UniDB->getStorageSize(), PrevStoreSize); + // 'Other' tree and 'Key2' got garbage-collected. { OnDiskGraphDB &DB = UniDB->getGraphDB();