From 1fee7f70cffcd9a670fd7e2a39cfde9d16390226 Mon Sep 17 00:00:00 2001 From: Googler Date: Mon, 25 Nov 2024 09:18:41 -0800 Subject: [PATCH] Implement main garbage collection logic for stale install bases. Progress on #2109. PiperOrigin-RevId: 700006410 Change-Id: Ifd0cfdca6d4124addfecb99b0dec5f488e3ffedd --- .../google/devtools/build/lib/server/BUILD | 10 ++ .../server/InstallBaseGarbageCollector.java | 141 ++++++++++++++++ .../google/devtools/build/lib/server/BUILD | 3 + .../InstallBaseGarbageCollectorTest.java | 150 ++++++++++++++++++ 4 files changed, 304 insertions(+) create mode 100644 src/main/java/com/google/devtools/build/lib/server/InstallBaseGarbageCollector.java create mode 100644 src/test/java/com/google/devtools/build/lib/server/InstallBaseGarbageCollectorTest.java diff --git a/src/main/java/com/google/devtools/build/lib/server/BUILD b/src/main/java/com/google/devtools/build/lib/server/BUILD index a3d157c886ca55..b49dde0b686cf2 100644 --- a/src/main/java/com/google/devtools/build/lib/server/BUILD +++ b/src/main/java/com/google/devtools/build/lib/server/BUILD @@ -101,3 +101,13 @@ java_library( "//src/main/java/com/google/devtools/build/lib/util:abrupt_exit_exception", ], ) + +java_library( + name = "install_base_garbage_collector", + srcs = ["InstallBaseGarbageCollector.java"], + deps = [ + "//src/main/java/com/google/devtools/build/lib/util:file_system_lock", + "//src/main/java/com/google/devtools/build/lib/vfs", + "//third_party:guava", + ], +) diff --git a/src/main/java/com/google/devtools/build/lib/server/InstallBaseGarbageCollector.java b/src/main/java/com/google/devtools/build/lib/server/InstallBaseGarbageCollector.java new file mode 100644 index 00000000000000..53593586b5cfb8 --- /dev/null +++ b/src/main/java/com/google/devtools/build/lib/server/InstallBaseGarbageCollector.java @@ -0,0 +1,141 @@ +// Copyright 2024 The Bazel Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +package com.google.devtools.build.lib.server; + +import com.google.common.annotations.VisibleForTesting; +import com.google.devtools.build.lib.util.FileSystemLock; +import com.google.devtools.build.lib.util.FileSystemLock.LockAlreadyHeldException; +import com.google.devtools.build.lib.vfs.Dirent; +import com.google.devtools.build.lib.vfs.FileStatus; +import com.google.devtools.build.lib.vfs.Path; +import com.google.devtools.build.lib.vfs.Symlinks; +import java.io.IOException; +import java.time.Duration; +import java.time.Instant; +import java.util.UUID; + +/** + * A garbage collector for stale install bases. + * + *

Garbage collection operates on other install bases found in the parent directory of our own + * install base. The mtime of each install base directory, which is updated by the client on every + * invocation, determines whether it's eligible for garbage collection. In addition, both clients + * and servers place a lock on their respective install base to prevent it from being collected + * while in use. + */ +public final class InstallBaseGarbageCollector { + @VisibleForTesting static final String LOCK_SUFFIX = ".lock"; + @VisibleForTesting static final String DELETED_SUFFIX = ".deleted"; + + private final Path root; + private final Path ownInstallBase; + private final Duration maxAge; + + /** + * Creates a new garbage collector. + * + * @param root the install user root, i.e., the parent directory of install bases + * @param ownInstallBase the current server's install base + * @param maxAge how long an install base must remain unused before it's eligible for collection + */ + public InstallBaseGarbageCollector(Path root, Path ownInstallBase, Duration maxAge) { + this.root = root; + this.ownInstallBase = ownInstallBase; + this.maxAge = maxAge; + } + + public void run() throws IOException, InterruptedException { + for (Dirent dirent : root.readdir(Symlinks.FOLLOW)) { + if (Thread.interrupted()) { + throw new InterruptedException(); + } + if (!dirent.getType().equals(Dirent.Type.DIRECTORY)) { + // Ignore non-directories. + continue; + } + Path child = root.getChild(dirent.getName()); + if (isInstallBase(child)) { + if (child.equals(ownInstallBase)) { + // Don't attempt to collect our own install base. + continue; + } + collectWhenStale(child); + } else if (isIncompleteDeletion(child)) { + // This install base is either being deleted, or an earlier attempt to delete it was + // interrupted. Assume the latter and try again, otherwise it will never be deleted. + // Concurrent attempts are fine because deleteTree treats not found as successful deletion. + child.deleteTree(); + } + } + } + + private void collectWhenStale(Path installBase) throws IOException { + Path pathToDelete = null; + Path lockPath = getLockPath(installBase); + try (FileSystemLock lock = FileSystemLock.getExclusive(lockPath)) { + FileStatus status = installBase.statIfFound(); + if (status == null) { + // The install base is already gone. Back off. + // This cannot be a garbage collection by another Bazel server, as it would have taken an + // exclusive lock, but maybe the user or something else in the system did a cleanup. + return; + } + Duration age = + Duration.between(Instant.ofEpochMilli(status.getLastModifiedTime()), Instant.now()); + if (age.compareTo(maxAge) < 0) { + // The install base was recently used. Back off. + // If the install base belongs to an older binary that doesn't lock it before use, it's + // possible to hit a tiny race condition between the older binary checking whether the + // install base exists and updating its mtime. Unfortunately, this is the best we can do. + return; + } + // Rename the install base before deleting it. + // This avoids leaving behind a corrupted install base if the deletion is interrupted, which + // would be treated as a fatal error by a subsequent invocation and require a manual cleanup. + // The new name must be unique, because the same install base can be recreated and deleted for + // a second time after a first deletion attempt is interrupted. + pathToDelete = getDeletedPath(installBase); + installBase.renameTo(pathToDelete); + // Now that the install base has been renamed, we can delete the lock file. + // This is done early to avoid leaving the lock file behind if the deletion is interrupted. + // It's still possible to get interrupted in between the rename and delete, but we accept it. + lockPath.delete(); + } catch (LockAlreadyHeldException e) { + // Looks like this install base is currently in use. Back off. + return; + } + // We can now perform the actual deletion. + pathToDelete.deleteTree(); + } + + private static Path getLockPath(Path installBase) { + Path parent = installBase.getParentDirectory(); + return parent.getChild(installBase.getBaseName() + LOCK_SUFFIX); + } + + private static Path getDeletedPath(Path installBase) { + Path parent = installBase.getParentDirectory(); + return parent.getChild(UUID.randomUUID() + DELETED_SUFFIX); + } + + private static boolean isInstallBase(Path path) { + String name = path.getBaseName(); + return name.length() == 32 + && name.chars().allMatch(c -> (c >= 'a' && c <= 'f') || (c >= '0' && c <= '9')); + } + + private static boolean isIncompleteDeletion(Path path) { + return path.getBaseName().endsWith(DELETED_SUFFIX); + } +} diff --git a/src/test/java/com/google/devtools/build/lib/server/BUILD b/src/test/java/com/google/devtools/build/lib/server/BUILD index 0e412e1343193f..700715d4b9c2dd 100644 --- a/src/test/java/com/google/devtools/build/lib/server/BUILD +++ b/src/test/java/com/google/devtools/build/lib/server/BUILD @@ -22,6 +22,7 @@ java_library( "//src/main/java/com/google/devtools/build/lib/clock", "//src/main/java/com/google/devtools/build/lib/server", "//src/main/java/com/google/devtools/build/lib/server:idle_task", + "//src/main/java/com/google/devtools/build/lib/server:install_base_garbage_collector", "//src/main/java/com/google/devtools/build/lib/server:pid_file_watcher", "//src/main/java/com/google/devtools/build/lib/server:shutdown_hooks", "//src/main/java/com/google/devtools/build/lib/unix:procmeminfo_parser", @@ -30,6 +31,7 @@ java_library( "//src/main/java/com/google/devtools/build/lib/util/io:io-proto", "//src/main/java/com/google/devtools/build/lib/util/io:out-err", "//src/main/java/com/google/devtools/build/lib/vfs", + "//src/main/java/com/google/devtools/build/lib/vfs:pathfragment", "//src/main/java/com/google/devtools/build/lib/vfs/inmemoryfs", "//src/main/protobuf:command_server_java_grpc", "//src/main/protobuf:command_server_java_proto", @@ -38,6 +40,7 @@ java_library( "//src/test/java/com/google/devtools/build/lib/testutil", "//src/test/java/com/google/devtools/build/lib/testutil:TestThread", "//src/test/java/com/google/devtools/build/lib/testutil:TestUtils", + "//src/test/java/com/google/devtools/build/lib/testutil:external_file_system_lock", "//third_party:guava", "//third_party:junit4", "//third_party:mockito", diff --git a/src/test/java/com/google/devtools/build/lib/server/InstallBaseGarbageCollectorTest.java b/src/test/java/com/google/devtools/build/lib/server/InstallBaseGarbageCollectorTest.java new file mode 100644 index 00000000000000..c01722521d86c8 --- /dev/null +++ b/src/test/java/com/google/devtools/build/lib/server/InstallBaseGarbageCollectorTest.java @@ -0,0 +1,150 @@ +// Copyright 2024 The Bazel Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +package com.google.devtools.build.lib.server; + +import static com.google.common.truth.Truth.assertThat; +import static com.google.devtools.build.lib.server.InstallBaseGarbageCollector.DELETED_SUFFIX; +import static com.google.devtools.build.lib.server.InstallBaseGarbageCollector.LOCK_SUFFIX; + +import com.google.devtools.build.lib.testutil.ExternalFileSystemLock; +import com.google.devtools.build.lib.testutil.TestUtils; +import com.google.devtools.build.lib.vfs.FileSystemUtils; +import com.google.devtools.build.lib.vfs.Path; +import com.google.devtools.build.lib.vfs.PathFragment; +import java.io.IOException; +import java.time.Duration; +import java.time.Instant; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; + +/** Tests for {@link InstallBaseGarbageCollector}. */ +@RunWith(JUnit4.class) +public final class InstallBaseGarbageCollectorTest { + private static final String OWN_MD5 = "012345678901234567890123456789012"; + private static final String OTHER_MD5 = "abcdefabcdefabcdefabcdefabcdefab"; + + private Path rootDir; + private Path ownInstallBase; + + @Before + public void setUp() throws Exception { + rootDir = TestUtils.createUniqueTmpDir(null); + ownInstallBase = createSubdirectory(OWN_MD5); + } + + @Test + public void onlyOwnInstallBase_notCollected() throws Exception { + run(Duration.ZERO); + + assertDirectoryContents(OWN_MD5); + } + + @Test + public void otherInstallBase_notStaleAndUnlocked_notCollected() throws Exception { + Path otherInstallBase = createSubdirectory(OTHER_MD5); + setAge(otherInstallBase, Duration.ofDays(1)); + + run(Duration.ofDays(2)); + + assertDirectoryContents(OWN_MD5, OTHER_MD5, OTHER_MD5 + LOCK_SUFFIX); + } + + @Test + public void otherInstallBase_notStaleAndLocked_notCollected() throws Exception { + Path otherInstallBase = createSubdirectory(OTHER_MD5); + setAge(otherInstallBase, Duration.ofDays(1)); + + try (var lock = ExternalFileSystemLock.getShared(rootDir.getChild(OTHER_MD5 + LOCK_SUFFIX))) { + run(Duration.ofDays(2)); + } + + assertDirectoryContents(OWN_MD5, OTHER_MD5, OTHER_MD5 + LOCK_SUFFIX); + } + + @Test + public void otherInstallBase_staleAndUnlocked_collected() throws Exception { + Path otherInstallBase = createSubdirectory(OTHER_MD5); + setAge(otherInstallBase, Duration.ofDays(3)); + + run(Duration.ofDays(2)); + + assertDirectoryContents(OWN_MD5); + } + + @Test + public void otherInstallBase_staleAndLocked_notCollected() throws Exception { + Path otherInstallBase = createSubdirectory(OTHER_MD5); + setAge(otherInstallBase, Duration.ofDays(3)); + + try (var lock = ExternalFileSystemLock.getShared(rootDir.getChild(OTHER_MD5 + LOCK_SUFFIX))) { + run(Duration.ofDays(2)); + } + + assertDirectoryContents(OWN_MD5, OTHER_MD5, OTHER_MD5 + LOCK_SUFFIX); + } + + @Test + public void incompleteDeletion_collected() throws Exception { + Path incompleteDeletion = createSubdirectory(OTHER_MD5 + DELETED_SUFFIX); + setAge(incompleteDeletion, Duration.ofDays(2)); + + run(Duration.ofDays(1)); + + assertDirectoryContents(OWN_MD5); + } + + @Test + public void otherFilesAndDirectories_notCollected() throws Exception { + Path otherFile = rootDir.getChild("file"); + FileSystemUtils.writeContentAsLatin1(otherFile, "content"); + setAge(otherFile, Duration.ofDays(2)); + Path otherDir = rootDir.getChild("dir"); + otherDir.createDirectoryAndParents(); + setAge(otherDir, Duration.ofDays(2)); + Path otherSymlink = rootDir.getChild("symlink"); + otherSymlink.createSymbolicLink(PathFragment.create(OWN_MD5)); + + run(Duration.ofDays(1)); + + assertDirectoryContents(OWN_MD5, "file", "dir", "symlink"); + } + + private Path createSubdirectory(String name) throws IOException { + Path dir = rootDir.getChild(name); + Path file = dir.getChild("file"); + Path subdir = dir.getChild("subdir"); + Path subfile = subdir.getChild("file"); + dir.createDirectoryAndParents(); + subdir.createDirectoryAndParents(); + FileSystemUtils.writeContentAsLatin1(file, "content"); + FileSystemUtils.writeContentAsLatin1(subfile, "content"); + + return dir; + } + + private void setAge(Path path, Duration age) throws IOException { + path.setLastModifiedTime(Instant.now().minus(age).toEpochMilli()); + } + + private void run(Duration maxAge) throws Exception { + new InstallBaseGarbageCollector(rootDir, ownInstallBase, maxAge).run(); + } + + private void assertDirectoryContents(Object... expected) throws Exception { + assertThat(rootDir.getDirectoryEntries().stream().map(Path::getBaseName)) + .containsExactly(expected); + } +}