Skip to content

Commit

Permalink
Add a standalone disk cache garbage collection utility.
Browse files Browse the repository at this point in the history
This might be useful to users who desire more control over when garbage collection runs. It also makes it easier to benchmark, since the facilities built into Bazel aren't capable of profiling idle tasks.

PiperOrigin-RevId: 679287940
Change-Id: Ib08bff105c2674f9d63ef57181805af30bbc0254
  • Loading branch information
tjgq authored and copybara-github committed Sep 26, 2024
1 parent 355b408 commit 46d5502
Show file tree
Hide file tree
Showing 5 changed files with 171 additions and 15 deletions.
1 change: 1 addition & 0 deletions src/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -377,6 +377,7 @@ filegroup(
"//src/tools/android:srcs",
"//src/tools/android/java/com/google/devtools/build/android:srcs",
"//src/tools/bzlmod:srcs",
"//src/tools/diskcache:srcs",
"//src/tools/execlog:srcs",
"//src/tools/launcher:srcs",
"//src/tools/one_version:srcs",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
// limitations under the License.
package com.google.devtools.build.lib.remote.disk;

import static com.google.devtools.build.lib.remote.util.Utils.bytesCountToDisplayString;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.flogger.GoogleLogger;
import com.google.devtools.build.lib.remote.disk.DiskCacheGarbageCollector.CollectionPolicy;
Expand Down Expand Up @@ -90,25 +92,12 @@ public void run() {
"Disk cache garbage collection finished: deleted %d of %d files, reclaimed %s of %s",
stats.deletedEntries(),
stats.totalEntries(),
formatBytes(stats.deletedBytes()),
formatBytes(stats.totalBytes()));
bytesCountToDisplayString(stats.deletedBytes()),
bytesCountToDisplayString(stats.totalBytes()));
} catch (IOException e) {
logger.atInfo().withCause(e).log("Disk cache garbage collection failed");
} catch (InterruptedException e) {
logger.atInfo().withCause(e).log("Disk cache garbage collection interrupted");
}
}

private static String formatBytes(long bytes) {
if (bytes >= 1024 * 1024 * 1024) {
return "%3fGB".formatted((double) bytes / (1024 * 1024 * 1024));
}
if (bytes >= 1024 * 1024) {
return "%3fMB".formatted((double) bytes / (1024 * 1024));
}
if (bytes >= 1024) {
return "%3fKB".formatted((double) bytes / 1024);
}
return "%dB".formatted(bytes);
}
}
29 changes: 29 additions & 0 deletions src/tools/diskcache/BUILD
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
load("@rules_java//java:defs.bzl", "java_binary")

package(
default_applicable_licenses = ["//:license"],
default_visibility = ["//:__pkg__"],
)

filegroup(
name = "srcs",
srcs = glob(["**"]),
visibility = ["//src:__subpackages__"],
)

java_binary(
name = "gc",
srcs = ["Gc.java"],
main_class = "diskcache.Gc",
visibility = ["//visibility:public"],
deps = [
"//java/com/google/common/util/concurrent",
"//src/main/java/com/google/devtools/build/lib/remote/disk",
"//src/main/java/com/google/devtools/build/lib/remote/util",
"//src/main/java/com/google/devtools/build/lib/unix",
"//src/main/java/com/google/devtools/build/lib/util:os",
"//src/main/java/com/google/devtools/build/lib/vfs",
"//src/main/java/com/google/devtools/build/lib/windows",
"//src/main/java/com/google/devtools/common/options",
],
)
132 changes: 132 additions & 0 deletions src/tools/diskcache/Gc.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
// Copyright 2024 The Bazel Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package diskcache;

import static com.google.devtools.build.lib.remote.util.Utils.bytesCountToDisplayString;
import static java.lang.Math.min;

import com.google.common.util.concurrent.ThreadFactoryBuilder;
import com.google.devtools.build.lib.remote.disk.DiskCacheGarbageCollector;
import com.google.devtools.build.lib.remote.disk.DiskCacheGarbageCollector.CollectionPolicy;
import com.google.devtools.build.lib.remote.disk.DiskCacheGarbageCollector.CollectionStats;
import com.google.devtools.build.lib.unix.UnixFileSystem;
import com.google.devtools.build.lib.util.OS;
import com.google.devtools.build.lib.vfs.DigestHashFunction;
import com.google.devtools.build.lib.vfs.FileSystem;
import com.google.devtools.build.lib.windows.WindowsFileSystem;
import com.google.devtools.common.options.Converters.ByteSizeConverter;
import com.google.devtools.common.options.Option;
import com.google.devtools.common.options.OptionDocumentationCategory;
import com.google.devtools.common.options.OptionEffectTag;
import com.google.devtools.common.options.OptionsBase;
import com.google.devtools.common.options.OptionsParser;
import java.time.Duration;
import java.util.Optional;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;

/** Standalone disk cache garbage collection utility. */
public final class Gc {

private Gc() {}

/** Command line options. */
public static final class Options extends OptionsBase {

@Option(
name = "disk_cache",
defaultValue = "null",
documentationCategory = OptionDocumentationCategory.UNCATEGORIZED,
effectTags = {OptionEffectTag.UNKNOWN},
help = "Path to disk cache.")
public String diskCache;

@Option(
name = "max_size",
defaultValue = "0",
converter = ByteSizeConverter.class,
documentationCategory = OptionDocumentationCategory.UNCATEGORIZED,
effectTags = {OptionEffectTag.UNKNOWN},
help =
"The target size for the disk cache. If set to a positive value, older entries will be"
+ " deleted as required to reach this size.")
public long maxSize;

@Option(
name = "max_age",
defaultValue = "0",
documentationCategory = OptionDocumentationCategory.UNCATEGORIZED,
effectTags = {OptionEffectTag.UNKNOWN},
help =
"The target age for the disk cache. If set to a positive value, entries exceeding this"
+ " age will be deleted.")
public Duration maxAge;
}

private static final ExecutorService executorService =
Executors.newFixedThreadPool(
min(4, Runtime.getRuntime().availableProcessors()),
new ThreadFactoryBuilder().setNameFormat("disk-cache-gc-%d").build());

public static void main(String[] args) throws Exception {
OptionsParser op = OptionsParser.builder().optionsClasses(Options.class).build();
op.parseAndExitUponError(args);

Options options = op.getOptions(Options.class);

if (options.diskCache == null) {
System.err.println("--disk_cache must be specified.");
System.exit(1);
}

if (options.maxSize <= 0 && options.maxAge.isZero()) {
System.err.println(
"At least one of --max_size or --max_age must be set to a positive value.");
System.exit(1);
}

var root = getFileSystem().getPath(options.diskCache);
if (!root.isDirectory()) {
System.err.println("Expected --disk_cache to exist and be a directory.");
System.exit(1);
}

var policy =
new CollectionPolicy(
options.maxSize == 0 ? Optional.empty() : Optional.of(options.maxSize),
options.maxAge.isZero() ? Optional.empty() : Optional.of(options.maxAge));

var gc = new DiskCacheGarbageCollector(root, executorService, policy);

CollectionStats stats = gc.run();

System.out.printf(
"Deleted %d of %d files, reclaimed %s of %s\n",
stats.deletedEntries(),
stats.totalEntries(),
bytesCountToDisplayString(stats.deletedBytes()),
bytesCountToDisplayString(stats.totalBytes()));

System.exit(0);
}

private static FileSystem getFileSystem() {
// Note: the digest function is irrelevant, as the garbage collector scans the entire disk cache
// and never computes digests.
if (OS.getCurrent() == OS.WINDOWS) {
return new WindowsFileSystem(DigestHashFunction.SHA256, false);
}
return new UnixFileSystem(DigestHashFunction.SHA256, "");
}
}
5 changes: 5 additions & 0 deletions src/tools/diskcache/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# Standalone disk cache garbage collection utility

This utility may be used to manually run a garbage collection on a disk cache,
if more control over when which garbage collection runs is desired than afforded
by the automatic garbage collection built into Bazel.

0 comments on commit 46d5502

Please sign in to comment.