Skip to content

Commit

Permalink
[cas/libclang] Add libclang APIs for restricting the size of the loca…
Browse files Browse the repository at this point in the history
…l CAS directory

The new APIs allow the client to:
* Get the current size
* Set a size limit in bytes
* Prune data from the directory

The client decides how to derive the proper size limit.

rdar://121129053
  • Loading branch information
akyrtzi committed Jan 18, 2024
1 parent 9e5f4c7 commit 00b7635
Show file tree
Hide file tree
Showing 16 changed files with 399 additions and 16 deletions.
34 changes: 34 additions & 0 deletions clang/include/clang-c/CAS.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
#include "clang-c/CXString.h"
#include "clang-c/Platform.h"
#include <stdbool.h>
#include <stdint.h>

#ifdef __cplusplus
extern "C" {
Expand Down Expand Up @@ -118,6 +119,39 @@ clang_experimental_cas_Databases_create(CXCASOptions Opts, CXString *Error);
*/
CINDEX_LINKAGE void clang_experimental_cas_Databases_dispose(CXCASDatabases);

/**
* Get the local storage size of the CAS/cache data in bytes.
*
* \param[out] OutError The error object to pass back to client (if any).
* If non-null the object must be disposed using \c clang_Error_dispose.
* \returns the local storage size of the CAS/cache data, or -1 if the
* implementation does not support reporting such size, or -2 if an error
* occurred.
*/
CINDEX_LINKAGE int64_t clang_experimental_cas_Databases_get_storage_size(
CXCASDatabases, CXError *OutError);

/**
* Set the size for limiting disk storage growth.
*
* \param size_limit the maximum size limit in bytes. 0 means no limit. Negative
* values are invalid.
* \returns an error object if there was an error, NULL otherwise.
* If non-null the object must be disposed using \c clang_Error_dispose.
*/
CINDEX_LINKAGE CXError clang_experimental_cas_Databases_set_size_limit(
CXCASDatabases, int64_t size_limit);

/**
* Prune local storage to reduce its size according to the desired size limit.
* Pruning can happen concurrently with other operations.
*
* \returns an error object if there was an error, NULL otherwise.
* If non-null the object must be disposed using \c clang_Error_dispose.
*/
CINDEX_LINKAGE
CXError clang_experimental_cas_Databases_prune_ondisk_data(CXCASDatabases);

/**
* Loads an object using its printed \p CASID.
*
Expand Down
63 changes: 63 additions & 0 deletions clang/test/CAS/libclang-prune-data.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
// REQUIRES: ondisk_cas

// Tests that the CAS directory storage can be limited via libclang APIs.
// The test depends on internal details of the CAS directory structure.

// RUN: rm -rf %t && mkdir -p %t

// RUN: %clang -cc1depscan -fdepscan=inline -fdepscan-include-tree -o %t/t.rsp -cc1-args \
// RUN: -cc1 -triple x86_64-apple-macos12 -fcas-path %t/cas -emit-obj %s -o %t/output.o
// RUN: %clang @%t/t.rsp
// RUN: ls %t/cas | wc -l | grep 2
// RUN: ls %t/cas | grep v1.1

// Limit too high, no change.
// RUN: c-index-test core -prune-cas -cas-path %t/cas 100000000
// RUN: ls %t/cas | wc -l | grep 2

// Under the limit, starts a chain.
// RUN: c-index-test core -prune-cas -cas-path %t/cas 10
// RUN: ls %t/cas | wc -l | grep 3
// RUN: ls %t/cas | grep v1.2

// Under the limit, starts a chain and abandons oldest dir.
// RUN: c-index-test core -prune-cas -cas-path %t/cas 10
// RUN: ls %t/cas | wc -l | grep 4
// RUN: ls %t/cas | grep v1.3

// Under the limit, removes abandonded dir, starts a chain and abandons oldest dir.
// RUN: c-index-test core -prune-cas -cas-path %t/cas 10
// RUN: ls %t/cas | wc -l | grep 4
// RUN: ls %t/cas | grep v1.4
// RUN: ls %t/cas | grep -v v1.1

// Same test but using the plugin CAS.

// RUN: rm -rf %t/cas

// RUN: %clang -cc1depscan -fdepscan=inline -fdepscan-include-tree -o %t/t.rsp -cc1-args \
// RUN: -cc1 -triple x86_64-apple-macos12 -fcas-path %t/cas -emit-obj %s -o %t/output.o \
// RUN: -fcas-plugin-path %llvmshlibdir/libCASPluginTest%pluginext
// RUN: %clang @%t/t.rsp
// RUN: ls %t/cas | wc -l | grep 2
// RUN: ls %t/cas | grep v1.1

// Limit too high, no change.
// RUN: c-index-test core -prune-cas -cas-path %t/cas 100000000 -fcas-plugin-path %llvmshlibdir/libCASPluginTest%pluginext
// RUN: ls %t/cas | wc -l | grep 2

// Under the limit, starts a chain.
// RUN: c-index-test core -prune-cas -cas-path %t/cas 10 -fcas-plugin-path %llvmshlibdir/libCASPluginTest%pluginext
// RUN: ls %t/cas | wc -l | grep 3
// RUN: ls %t/cas | grep v1.2

// Under the limit, starts a chain and abandons oldest dir.
// RUN: c-index-test core -prune-cas -cas-path %t/cas 10 -fcas-plugin-path %llvmshlibdir/libCASPluginTest%pluginext
// RUN: ls %t/cas | wc -l | grep 4
// RUN: ls %t/cas | grep v1.3

// Under the limit, removes abandonded dir, starts a chain and abandons oldest dir.
// RUN: c-index-test core -prune-cas -cas-path %t/cas 10 -fcas-plugin-path %llvmshlibdir/libCASPluginTest%pluginext
// RUN: ls %t/cas | wc -l | grep 4
// RUN: ls %t/cas | grep v1.4
// RUN: ls %t/cas | grep -v v1.1
56 changes: 56 additions & 0 deletions clang/tools/c-index-test/core_main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ enum class ActionType {
ScanDepsByModuleName,
MaterializeCachedJob,
ReplayCachedJob,
PruneCAS,
WatchDir,
};

Expand Down Expand Up @@ -88,6 +89,7 @@ Action(cl::desc("Action:"), cl::init(ActionType::None),
"Materialize cached compilation data from upstream CAS"),
clEnumValN(ActionType::ReplayCachedJob, "replay-cached-job",
"Replay a cached compilation from the CAS"),
clEnumValN(ActionType::PruneCAS, "prune-cas", "Prune CAS data"),
clEnumValN(ActionType::WatchDir,
"watch-dir", "Watch directory for file events")),
cl::cat(IndexTestCoreCategory));
Expand Down Expand Up @@ -990,6 +992,43 @@ static int replayCachedJob(ArrayRef<const char *> Args,
return 0;
}

static int pruneCAS(int64_t Limit, CXCASDatabases DBs) {
CXError Err = nullptr;
int64_t Size = clang_experimental_cas_Databases_get_storage_size(DBs, &Err);
if (Size == -2) {
llvm::errs() << "clang_experimental_cas_Databases_get_storage_size: "
<< clang_Error_getDescription(Err) << "\n";
clang_Error_dispose(Err);
return 1;
}
if (Size == -1) {
llvm::errs()
<< "unsupported clang_experimental_cas_Databases_get_storage_size";
return 1;
}
if (Size == 0) {
llvm::errs()
<< "clang_experimental_cas_Databases_get_storage_size returned 0";
return 1;
}

if (CXError Err =
clang_experimental_cas_Databases_set_size_limit(DBs, Limit)) {
llvm::errs() << "clang_experimental_cas_Databases_set_size_limit: "
<< clang_Error_getDescription(Err) << "\n";
clang_Error_dispose(Err);
return 1;
}
if (CXError Err = clang_experimental_cas_Databases_prune_ondisk_data(DBs)) {
llvm::errs() << "clang_experimental_cas_Databases_prune_ondisk_data: "
<< clang_Error_getDescription(Err) << "\n";
clang_Error_dispose(Err);
return 1;
}

return 0;
}

static void printSymbol(const IndexRecordDecl &Rec, raw_ostream &OS) {
printSymbolInfo(Rec.SymInfo, OS);
OS << " | ";
Expand Down Expand Up @@ -1379,6 +1418,23 @@ int indextest_core_main(int argc, const char **argv) {
options::InputFiles[0], DBs);
}

if (options::Action == ActionType::PruneCAS) {
if (options::InputFiles.empty()) {
errs() << "error: missing size limit\n";
return 1;
}
int64_t Limit;
if (StringRef(options::InputFiles[0]).getAsInteger(10, Limit)) {
errs() << "error: size limit not an integer\n";
return 1;
}
if (!DBs) {
errs() << "error: CAS was not configured\n";
return 1;
}
return pruneCAS(Limit, DBs);
}

if (options::Action == ActionType::WatchDir) {
if (options::InputFiles.empty()) {
errs() << "error: missing directory path\n";
Expand Down
72 changes: 62 additions & 10 deletions clang/tools/libclang/CCAS.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,14 +58,20 @@ DEFINE_SIMPLE_CONVERSION_FUNCTIONS(WrappedReplayResult, CXCASReplayResult)

} // anonymous namespace

static void passAsCXError(Error &&E, CXError *OutError) {
if (OutError)
*OutError = cxerror::create(std::move(E));
else
llvm::consumeError(std::move(E));
}

CXCASCachedCompilation WrappedCachedCompilation::fromResultID(
Expected<std::optional<CASID>> ResultID, CASID CacheKey,
const std::shared_ptr<llvm::cas::ObjectStore> &CAS,
const std::shared_ptr<llvm::cas::ActionCache> &AC, CXError *OutError) {

auto failure = [OutError](Error &&E) -> CXCASCachedCompilation {
if (OutError)
*OutError = cxerror::create(std::move(E));
passAsCXError(std::move(E), OutError);
return nullptr;
};

Expand Down Expand Up @@ -139,6 +145,57 @@ void clang_experimental_cas_Databases_dispose(CXCASDatabases CDBs) {
delete unwrap(CDBs);
}

int64_t clang_experimental_cas_Databases_get_storage_size(CXCASDatabases CDBs,
CXError *OutError) {
// Commonly used ObjectStore implementations (on-disk and plugin) combine a
// CAS and action-cache into a single directory managing the storage
// holistically for both, so calling the ObjectStore API is sufficient.
// FIXME: For completeness we should figure out how to deal with potential
// implementations that use separate directories for CAS and action-cache.
std::optional<uint64_t> Size;
if (Error E = unwrap(CDBs)->CAS->getStorageSize().moveInto(Size)) {
passAsCXError(std::move(E), OutError);
return -2;
}
if (!Size)
return -1;
return *Size;
}

CXError clang_experimental_cas_Databases_set_size_limit(CXCASDatabases CDBs,
int64_t size_limit) {
// Commonly used ObjectStore implementations (on-disk and plugin) combine a
// CAS and action-cache into a single directory managing the storage
// holistically for both, so calling the ObjectStore API is sufficient.
// FIXME: For completeness we should figure out how to deal with potential
// implementations that use separate directories for CAS and action-cache.
std::optional<uint64_t> SizeLimit;
if (size_limit < 0) {
return cxerror::create(llvm::createStringError(
llvm::inconvertibleErrorCode(),
"invalid size limit passed to "
"clang_experimental_cas_Databases_set_size_limit"));
}
if (size_limit > 0) {
SizeLimit = size_limit;
}
if (Error E = unwrap(CDBs)->CAS->setSizeLimit(SizeLimit))
return cxerror::create(std::move(E));
return nullptr;
}

CXError
clang_experimental_cas_Databases_prune_ondisk_data(CXCASDatabases CDBs) {
// Commonly used ObjectStore implementations (on-disk and plugin) combine a
// CAS and action-cache into a single directory managing the storage
// holistically for both, so calling the ObjectStore API is sufficient.
// FIXME: For completeness we should figure out how to deal with potential
// implementations that use separate directories for CAS and action-cache.
if (Error E = unwrap(CDBs)->CAS->pruneStorageData())
return cxerror::create(std::move(E));
return nullptr;
}

CXCASObject clang_experimental_cas_loadObjectByString(CXCASDatabases CDBs,
const char *PrintedID,
CXError *OutError) {
Expand All @@ -149,8 +206,7 @@ CXCASObject clang_experimental_cas_loadObjectByString(CXCASDatabases CDBs,
*OutError = nullptr;

auto failure = [OutError](Error &&E) -> CXCASObject {
if (OutError)
*OutError = cxerror::create(std::move(E));
passAsCXError(std::move(E), OutError);
return nullptr;
};

Expand Down Expand Up @@ -311,8 +367,7 @@ clang_experimental_cas_getCachedCompilation(CXCASDatabases CDBs,
*OutError = nullptr;

auto failure = [OutError](Error &&E) -> CXCASCachedCompilation {
if (OutError)
*OutError = cxerror::create(std::move(E));
passAsCXError(std::move(E), OutError);
return nullptr;
};

Expand Down Expand Up @@ -441,10 +496,7 @@ CXCASReplayResult clang_experimental_cas_replayCompilation(
std::move(Invok), WorkingDirectory, WComp.CacheKey,
WComp.CachedResult, DiagText)
.moveInto(Ret)) {
if (OutError)
*OutError = cxerror::create(std::move(E));
else
llvm::consumeError(std::move(E));
passAsCXError(std::move(E), OutError);
return nullptr;
}

Expand Down
3 changes: 3 additions & 0 deletions clang/tools/libclang/libclang.map
Original file line number Diff line number Diff line change
Expand Up @@ -486,6 +486,9 @@ LLVM_16 {
clang_experimental_cas_CASObject_dispose;
clang_experimental_cas_Databases_create;
clang_experimental_cas_Databases_dispose;
clang_experimental_cas_Databases_get_storage_size;
clang_experimental_cas_Databases_prune_ondisk_data;
clang_experimental_cas_Databases_set_size_limit;
clang_experimental_cas_getCachedCompilation;
clang_experimental_cas_getCachedCompilation_async;
clang_experimental_cas_loadObjectByString;
Expand Down
33 changes: 33 additions & 0 deletions llvm/include/llvm-c/CAS/PluginAPI_functions.h
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,39 @@ LLCAS_PUBLIC llcas_cas_t llcas_cas_create(llcas_cas_options_t, char **error);
*/
LLCAS_PUBLIC void llcas_cas_dispose(llcas_cas_t);

/**
* Get the local storage size of the CAS/cache data in bytes.
*
* \param error optional pointer to receive an error message if an error
* occurred. If set, the memory it points to needs to be released via
* \c llcas_string_dispose.
* \returns the local storage size of the CAS/cache data, or -1 if the
* implementation does not support reporting such size, or -2 if an error
* occurred.
*/
LLCAS_PUBLIC int64_t llcas_cas_get_ondisk_size(llcas_cas_t, char **error);

/**
* Set the size for limiting disk storage growth.
*
* \param size_limit the maximum size limit in bytes. 0 means no limit. Negative
* values are invalid.
* \param error optional pointer to receive an error message if an error
* occurred. If set, the memory it points to needs to be released via
* \c llcas_string_dispose.
* \returns true if there was an error, false otherwise.
*/
LLCAS_PUBLIC bool
llcas_cas_set_ondisk_size_limit(llcas_cas_t, int64_t size_limit, char **error);

/**
* Prune local storage to reduce its size according to the desired size limit.
* Pruning can happen concurrently with other operations.
*
* \returns true if there was an error, false otherwise.
*/
LLCAS_PUBLIC bool llcas_cas_prune_ondisk_data(llcas_cas_t, char **error);

/**
* \returns the hash schema name that the plugin is using. The string memory it
* points to needs to be released via \c llcas_string_dispose.
Expand Down
22 changes: 22 additions & 0 deletions llvm/include/llvm/CAS/ObjectStore.h
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,28 @@ class ObjectStore {
return Data.size();
}

/// Set the size for limiting growth of on-disk storage. This has an effect
/// for when the instance is closed.
///
/// Implementations may be not have this implemented.
virtual Error setSizeLimit(std::optional<uint64_t> SizeLimit) {
return Error::success();
}

/// \returns the storage size of the on-disk CAS data.
///
/// Implementations that don't have an implementation for this should return
/// \p std::nullopt.
virtual Expected<std::optional<uint64_t>> getStorageSize() const {
return std::nullopt;
}

/// Prune local storage to reduce its size according to the desired size
/// limit. Pruning can happen concurrently with other operations.
///
/// Implementations may be not have this implemented.
virtual Error pruneStorageData() { return Error::success(); }

/// Validate the whole node tree.
Error validateTree(ObjectRef Ref);

Expand Down
Loading

0 comments on commit 00b7635

Please sign in to comment.