From 46d5502745c97cb658349a0b0f4ddeef5f219057 Mon Sep 17 00:00:00 2001 From: Googler Date: Thu, 26 Sep 2024 14:09:24 -0700 Subject: [PATCH] Add a standalone disk cache garbage collection utility. This might be useful to users who desire more control over when garbage collection runs. It also makes it easier to benchmark, since the facilities built into Bazel aren't capable of profiling idle tasks. PiperOrigin-RevId: 679287940 Change-Id: Ib08bff105c2674f9d63ef57181805af30bbc0254 --- src/BUILD | 1 + .../DiskCacheGarbageCollectorIdleTask.java | 19 +-- src/tools/diskcache/BUILD | 29 ++++ src/tools/diskcache/Gc.java | 132 ++++++++++++++++++ src/tools/diskcache/README.md | 5 + 5 files changed, 171 insertions(+), 15 deletions(-) create mode 100644 src/tools/diskcache/BUILD create mode 100644 src/tools/diskcache/Gc.java create mode 100644 src/tools/diskcache/README.md diff --git a/src/BUILD b/src/BUILD index efb330f793b877..bbd988aeaa6e83 100644 --- a/src/BUILD +++ b/src/BUILD @@ -377,6 +377,7 @@ filegroup( "//src/tools/android:srcs", "//src/tools/android/java/com/google/devtools/build/android:srcs", "//src/tools/bzlmod:srcs", + "//src/tools/diskcache:srcs", "//src/tools/execlog:srcs", "//src/tools/launcher:srcs", "//src/tools/one_version:srcs", diff --git a/src/main/java/com/google/devtools/build/lib/remote/disk/DiskCacheGarbageCollectorIdleTask.java b/src/main/java/com/google/devtools/build/lib/remote/disk/DiskCacheGarbageCollectorIdleTask.java index 5530986400a377..8f9f68e3805436 100644 --- a/src/main/java/com/google/devtools/build/lib/remote/disk/DiskCacheGarbageCollectorIdleTask.java +++ b/src/main/java/com/google/devtools/build/lib/remote/disk/DiskCacheGarbageCollectorIdleTask.java @@ -13,6 +13,8 @@ // limitations under the License. package com.google.devtools.build.lib.remote.disk; +import static com.google.devtools.build.lib.remote.util.Utils.bytesCountToDisplayString; + import com.google.common.annotations.VisibleForTesting; import com.google.common.flogger.GoogleLogger; import com.google.devtools.build.lib.remote.disk.DiskCacheGarbageCollector.CollectionPolicy; @@ -90,25 +92,12 @@ public void run() { "Disk cache garbage collection finished: deleted %d of %d files, reclaimed %s of %s", stats.deletedEntries(), stats.totalEntries(), - formatBytes(stats.deletedBytes()), - formatBytes(stats.totalBytes())); + bytesCountToDisplayString(stats.deletedBytes()), + bytesCountToDisplayString(stats.totalBytes())); } catch (IOException e) { logger.atInfo().withCause(e).log("Disk cache garbage collection failed"); } catch (InterruptedException e) { logger.atInfo().withCause(e).log("Disk cache garbage collection interrupted"); } } - - private static String formatBytes(long bytes) { - if (bytes >= 1024 * 1024 * 1024) { - return "%3fGB".formatted((double) bytes / (1024 * 1024 * 1024)); - } - if (bytes >= 1024 * 1024) { - return "%3fMB".formatted((double) bytes / (1024 * 1024)); - } - if (bytes >= 1024) { - return "%3fKB".formatted((double) bytes / 1024); - } - return "%dB".formatted(bytes); - } } diff --git a/src/tools/diskcache/BUILD b/src/tools/diskcache/BUILD new file mode 100644 index 00000000000000..dc502158df4e28 --- /dev/null +++ b/src/tools/diskcache/BUILD @@ -0,0 +1,29 @@ +load("@rules_java//java:defs.bzl", "java_binary") + +package( + default_applicable_licenses = ["//:license"], + default_visibility = ["//:__pkg__"], +) + +filegroup( + name = "srcs", + srcs = glob(["**"]), + visibility = ["//src:__subpackages__"], +) + +java_binary( + name = "gc", + srcs = ["Gc.java"], + main_class = "diskcache.Gc", + visibility = ["//visibility:public"], + deps = [ + "//java/com/google/common/util/concurrent", + "//src/main/java/com/google/devtools/build/lib/remote/disk", + "//src/main/java/com/google/devtools/build/lib/remote/util", + "//src/main/java/com/google/devtools/build/lib/unix", + "//src/main/java/com/google/devtools/build/lib/util:os", + "//src/main/java/com/google/devtools/build/lib/vfs", + "//src/main/java/com/google/devtools/build/lib/windows", + "//src/main/java/com/google/devtools/common/options", + ], +) diff --git a/src/tools/diskcache/Gc.java b/src/tools/diskcache/Gc.java new file mode 100644 index 00000000000000..f1b5b9dee9b8fb --- /dev/null +++ b/src/tools/diskcache/Gc.java @@ -0,0 +1,132 @@ +// Copyright 2024 The Bazel Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +package diskcache; + +import static com.google.devtools.build.lib.remote.util.Utils.bytesCountToDisplayString; +import static java.lang.Math.min; + +import com.google.common.util.concurrent.ThreadFactoryBuilder; +import com.google.devtools.build.lib.remote.disk.DiskCacheGarbageCollector; +import com.google.devtools.build.lib.remote.disk.DiskCacheGarbageCollector.CollectionPolicy; +import com.google.devtools.build.lib.remote.disk.DiskCacheGarbageCollector.CollectionStats; +import com.google.devtools.build.lib.unix.UnixFileSystem; +import com.google.devtools.build.lib.util.OS; +import com.google.devtools.build.lib.vfs.DigestHashFunction; +import com.google.devtools.build.lib.vfs.FileSystem; +import com.google.devtools.build.lib.windows.WindowsFileSystem; +import com.google.devtools.common.options.Converters.ByteSizeConverter; +import com.google.devtools.common.options.Option; +import com.google.devtools.common.options.OptionDocumentationCategory; +import com.google.devtools.common.options.OptionEffectTag; +import com.google.devtools.common.options.OptionsBase; +import com.google.devtools.common.options.OptionsParser; +import java.time.Duration; +import java.util.Optional; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; + +/** Standalone disk cache garbage collection utility. */ +public final class Gc { + + private Gc() {} + + /** Command line options. */ + public static final class Options extends OptionsBase { + + @Option( + name = "disk_cache", + defaultValue = "null", + documentationCategory = OptionDocumentationCategory.UNCATEGORIZED, + effectTags = {OptionEffectTag.UNKNOWN}, + help = "Path to disk cache.") + public String diskCache; + + @Option( + name = "max_size", + defaultValue = "0", + converter = ByteSizeConverter.class, + documentationCategory = OptionDocumentationCategory.UNCATEGORIZED, + effectTags = {OptionEffectTag.UNKNOWN}, + help = + "The target size for the disk cache. If set to a positive value, older entries will be" + + " deleted as required to reach this size.") + public long maxSize; + + @Option( + name = "max_age", + defaultValue = "0", + documentationCategory = OptionDocumentationCategory.UNCATEGORIZED, + effectTags = {OptionEffectTag.UNKNOWN}, + help = + "The target age for the disk cache. If set to a positive value, entries exceeding this" + + " age will be deleted.") + public Duration maxAge; + } + + private static final ExecutorService executorService = + Executors.newFixedThreadPool( + min(4, Runtime.getRuntime().availableProcessors()), + new ThreadFactoryBuilder().setNameFormat("disk-cache-gc-%d").build()); + + public static void main(String[] args) throws Exception { + OptionsParser op = OptionsParser.builder().optionsClasses(Options.class).build(); + op.parseAndExitUponError(args); + + Options options = op.getOptions(Options.class); + + if (options.diskCache == null) { + System.err.println("--disk_cache must be specified."); + System.exit(1); + } + + if (options.maxSize <= 0 && options.maxAge.isZero()) { + System.err.println( + "At least one of --max_size or --max_age must be set to a positive value."); + System.exit(1); + } + + var root = getFileSystem().getPath(options.diskCache); + if (!root.isDirectory()) { + System.err.println("Expected --disk_cache to exist and be a directory."); + System.exit(1); + } + + var policy = + new CollectionPolicy( + options.maxSize == 0 ? Optional.empty() : Optional.of(options.maxSize), + options.maxAge.isZero() ? Optional.empty() : Optional.of(options.maxAge)); + + var gc = new DiskCacheGarbageCollector(root, executorService, policy); + + CollectionStats stats = gc.run(); + + System.out.printf( + "Deleted %d of %d files, reclaimed %s of %s\n", + stats.deletedEntries(), + stats.totalEntries(), + bytesCountToDisplayString(stats.deletedBytes()), + bytesCountToDisplayString(stats.totalBytes())); + + System.exit(0); + } + + private static FileSystem getFileSystem() { + // Note: the digest function is irrelevant, as the garbage collector scans the entire disk cache + // and never computes digests. + if (OS.getCurrent() == OS.WINDOWS) { + return new WindowsFileSystem(DigestHashFunction.SHA256, false); + } + return new UnixFileSystem(DigestHashFunction.SHA256, ""); + } +} diff --git a/src/tools/diskcache/README.md b/src/tools/diskcache/README.md new file mode 100644 index 00000000000000..4f50023b9f0fb2 --- /dev/null +++ b/src/tools/diskcache/README.md @@ -0,0 +1,5 @@ +# Standalone disk cache garbage collection utility + +This utility may be used to manually run a garbage collection on a disk cache, +if more control over when which garbage collection runs is desired than afforded +by the automatic garbage collection built into Bazel.