Skip to content

Commit

Permalink
UnixFileSystem: read cached hashes from extended attributes
Browse files Browse the repository at this point in the history
There are certain workloads where Bazel's running time gets dominated by
checksum computation. Examples include:

- People adding local_repository()s to their project that point to
  networked file shares.
- The use of repositories that contain very large input files.

When using remote execution, we need to compute digests to be able to
place such files in input roots. In many cases, a centralized CAS will
already contain these files. It would be nice if Bazel could efficiently
check for existence of such objects without needing to scan the file
locally.

This change extends UnixFileSystem to call getxattr() on an attribute
prior to falling back to reading file contents. The name of the extended
attribute that is used is configurable through a command line flag.

Using extended attributes to store this information also seems to be a
fairly common approach. Apparently it is also used within Google itself:

https://groups.google.com/g/bazel-discuss/c/6VmjSOLySnY/m/v2dpwt8jBgAJ

So far no code has been added to let Bazel write these attributes to
disk. The main goal so far is to speed up access to read-only corpora,
where the maintainers have spent the effort adding these attributes.
  • Loading branch information
EdSchouten committed Jun 30, 2020
1 parent 27a5c74 commit baf2383
Show file tree
Hide file tree
Showing 16 changed files with 43 additions and 18 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ public ModuleFileSystem getFileSystem(
throws DefaultHashFunctionNotSetException {
BlazeServerStartupOptions options = startupOptions.getOptions(BlazeServerStartupOptions.class);
boolean enableSymLinks = options != null && options.enableWindowsSymlinks;
String unixDigestHashAttributeName = options != null ? options.unixDigestHashAttributeName : "";
if ("0".equals(System.getProperty("io.bazel.EnableJni"))) {
// Ignore UnixFileSystem, to be used for bootstrapping.
return ModuleFileSystem.create(
Expand All @@ -101,6 +102,6 @@ public ModuleFileSystem getFileSystem(
return ModuleFileSystem.create(
OS.getCurrent() == OS.WINDOWS
? new WindowsFileSystem(enableSymLinks)
: new UnixFileSystem());
: new UnixFileSystem(unixDigestHashAttributeName));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -1108,7 +1108,7 @@ private static FileSystem defaultFileSystemImplementation(
// The JNI-based UnixFileSystem is faster, but on Windows it is not available.
return OS.getCurrent() == OS.WINDOWS
? new WindowsFileSystem(startupOptions.enableWindowsSymlinks)
: new UnixFileSystem();
: new UnixFileSystem(startupOptions.unixDigestHashAttributeName);
}

private static SubprocessFactory subprocessFactoryImplementation() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -481,4 +481,15 @@ public String getTypeDescription() {
+ "Requires Windows developer mode to be enabled and Windows 10 version 1703 or "
+ "greater.")
public boolean enableWindowsSymlinks;

@Option(
name = "unix_digest_hash_attribute_name",
defaultValue = "",
documentationCategory = OptionDocumentationCategory.UNDOCUMENTED,
effectTags = {OptionEffectTag.CHANGES_INPUTS, OptionEffectTag.LOSES_INCREMENTAL_STATE},
help =
"The name of an extended attribute that can be placed on files to store a precomputed "
+ "copy of the file's hash, corresponding with --digest_function. This option "
+ "can be used to reduce disk I/O and CPU load caused by hash computation.")
public String unixDigestHashAttributeName;
}
Original file line number Diff line number Diff line change
Expand Up @@ -46,10 +46,15 @@
@ThreadSafe
public class UnixFileSystem extends AbstractFileSystemWithCustomStat {

public UnixFileSystem() throws DefaultHashFunctionNotSetException {}
private final String hashAttributeName;

public UnixFileSystem(DigestHashFunction hashFunction) {
public UnixFileSystem(String hashAttributeName) throws DefaultHashFunctionNotSetException {
this.hashAttributeName = hashAttributeName;
}

public UnixFileSystem(DigestHashFunction hashFunction, String hashAttributeName) {
super(hashFunction);
this.hashAttributeName = hashAttributeName;
}

/**
Expand Down Expand Up @@ -409,6 +414,14 @@ public byte[] getxattr(Path path, String name, boolean followSymlinks) throws IO

@Override
protected byte[] getDigest(Path path) throws IOException {
// First attempt to obtain the digest from an extended attribute attached to the file. This
// prevents the checksum from being recomputed unnecessarily.
if (!hashAttributeName.isEmpty()) {
byte[] attr = getxattr(path, hashAttributeName, true);
if (attr != null)
return attr;
}

String name = path.toString();
long startTime = Profiler.nanoTimeMaybe();
try {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ DecompressorDescriptor.Builder createDescriptorBuilder() throws IOException {
FileSystem testFS =
OS.getCurrent() == OS.WINDOWS
? new JavaIoFileSystem(DigestHashFunction.getDefaultUnchecked())
: new UnixFileSystem(DigestHashFunction.getDefaultUnchecked());
: new UnixFileSystem(DigestHashFunction.getDefaultUnchecked(), "");

// do not rely on TestConstants.JAVATESTS_ROOT end with slash, but ensure separators
// are not duplicated
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ public class LastBuildEventTest {

@Test
public void testForwardsReferencedLocalFilesCall() {
FileSystem fs = new UnixFileSystem(DigestHashFunction.getDefaultUnchecked());
FileSystem fs = new UnixFileSystem(DigestHashFunction.getDefaultUnchecked(), "");
LocalFile localFile = new LocalFile(fs.getPath("/some/file"), LocalFileType.FAILED_TEST_OUTPUT);
LastBuildEvent event = new LastBuildEvent(new BuildEvent() {
@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ protected boolean realFileSystem() {

@Override
protected FileSystem createFileSystem() {
return new UnixFileSystem(DigestHashFunction.getDefaultUnchecked()) {
return new UnixFileSystem(DigestHashFunction.getDefaultUnchecked(), "") {
boolean threwException = false;

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ protected void setListener(FileListener listener) {
@Override
protected FileSystem createFileSystem() {
setListener(DUMMY_LISTENER);
return new UnixFileSystem(DigestHashFunction.getDefaultUnchecked()) {
return new UnixFileSystem(DigestHashFunction.getDefaultUnchecked(), "") {
@Override
protected void chmod(Path path, int chmod) throws IOException {
listener.get().handle(PathOp.CHMOD, path);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ protected boolean realFileSystem() {

@Override
protected FileSystem createFileSystem() {
return new UnixFileSystem(DigestHashFunction.getDefaultUnchecked()) {
return new UnixFileSystem(DigestHashFunction.getDefaultUnchecked(), "") {

private void recordAccess(PathOp op, Path path) {
if (receiver != null) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -878,7 +878,7 @@ public void hasExecutionStatistics_whenOptionIsEnabled() throws Exception {
// TODO(b/62588075) Currently no process-wrapper or execution statistics support in Windows.
assumeTrue(OS.getCurrent() != OS.WINDOWS);

FileSystem fs = new UnixFileSystem(DigestHashFunction.getDefaultUnchecked());
FileSystem fs = new UnixFileSystem(DigestHashFunction.getDefaultUnchecked(), "");

LocalExecutionOptions options = Options.getDefaults(LocalExecutionOptions.class);
options.collectLocalExecutionStatistics = true;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ public final class CommandUsingLinuxSandboxTest {

@Before
public final void createFileSystem() throws Exception {
testFS = new UnixFileSystem(DigestHashFunction.getDefaultUnchecked());
testFS = new UnixFileSystem(DigestHashFunction.getDefaultUnchecked(), "");
runfilesDir = testFS.getPath(BlazeTestUtils.runfilesDir());
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ public final class CommandUsingProcessWrapperTest {

@Before
public final void createFileSystem() throws Exception {
testFS = new UnixFileSystem(DigestHashFunction.getDefaultUnchecked());
testFS = new UnixFileSystem(DigestHashFunction.getDefaultUnchecked(), "");
}

private ProcessWrapper getProcessWrapper() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ public class NativePosixFilesTest {

@Before
public final void createFileSystem() throws Exception {
testFS = new UnixFileSystem(DigestHashFunction.getDefaultUnchecked());
testFS = new UnixFileSystem(DigestHashFunction.getDefaultUnchecked(), "");
workingDir = testFS.getPath(new File(TestUtils.tmpDir()).getCanonicalPath());
testFile = workingDir.getRelative("test");
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ public class UnixFileSystemTest extends SymlinkAwareFileSystemTest {

@Override
protected FileSystem getFreshFileSystem(DigestHashFunction digestHashFunction) {
return new UnixFileSystem(digestHashFunction);
return new UnixFileSystem(digestHashFunction, "");
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,8 @@ public class UnixPathEqualityTest {

@Before
public final void initializeFileSystem() throws Exception {
unixFs = new UnixFileSystem(DigestHashFunction.getDefaultUnchecked());
otherUnixFs = new UnixFileSystem(DigestHashFunction.getDefaultUnchecked());
unixFs = new UnixFileSystem(DigestHashFunction.getDefaultUnchecked(), "");
otherUnixFs = new UnixFileSystem(DigestHashFunction.getDefaultUnchecked(), "");
assertThat(unixFs != otherUnixFs).isTrue();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@ public static FileSystem getNativeFileSystem() {
try {
return Class.forName(TestConstants.TEST_REAL_UNIX_FILE_SYSTEM)
.asSubclass(FileSystem.class)
.getDeclaredConstructor(DigestHashFunction.class)
.newInstance(DigestHashFunction.getDefaultUnchecked());
.getDeclaredConstructor(DigestHashFunction.class, String.class)
.newInstance(DigestHashFunction.getDefaultUnchecked(), "");
} catch (Exception e) {
throw new IllegalStateException(e);
}
Expand Down

0 comments on commit baf2383

Please sign in to comment.