Skip to content

Commit

Permalink
Storage NIO: Try harder to make believable directories (#3775)
Browse files Browse the repository at this point in the history
* Try harder to make believable directories

Previous implementation of fake folders just looked for a trailing '/'.
Now we also list the files with this prefix and use this to determine
whether the input is a folder. This will detect 'dir' in addition to
just 'dir/' before.

This version will still say 'yes' to anything that ends in a slash,
even if they don't exist, maintaining the previous behavior.

* Clarify documentation and add test case

* add braces for linter

* apply reviewer suggestion

* fix listing folders that don't end in /

* fix comment

* Perf, tests, comments

Improved performance by moving the pseudodir code to only run on file
not found, leaving the common-case path to run faster.

Added tests for pseudodirs, and one in LocalStorage too.

Improved comments following reviewer suggestions.

* stylistic change

* Optimize

Remove check for path without '/' in places where it's not required.

* cosmetic change for linter
  • Loading branch information
jean-philippe-martin authored and chingor13 committed Oct 10, 2018
1 parent 1a3549d commit 636588c
Show file tree
Hide file tree
Showing 6 changed files with 236 additions and 43 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,11 @@ public abstract class CloudStorageConfiguration {
public abstract boolean stripPrefixSlash();

/**
* Returns {@code true} if paths with a trailing slash should be treated as fake directories.
* Returns {@code true} if directories and paths with a trailing slash should be treated as fake
* directories.
*
* <p>With this feature, if file "foo/bar.txt" exists then both "foo" and "foo/" will be
* treated as if they were existing directories.
*/
public abstract boolean usePseudoDirectories();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
import com.google.cloud.storage.StorageOptions;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.MoreObjects;
import com.google.common.base.Strings;
import com.google.common.collect.AbstractIterator;
import com.google.common.net.UrlEscapers;
import com.google.common.primitives.Ints;
Expand Down Expand Up @@ -339,7 +340,8 @@ private SeekableByteChannel newReadChannel(Path path, Set<? extends OpenOption>
}
}
CloudStoragePath cloudPath = CloudStorageUtil.checkPath(path);
if (cloudPath.seemsLikeADirectoryAndUsePseudoDirectories()) {
// passing false since we just want to check if it ends with /
if (cloudPath.seemsLikeADirectoryAndUsePseudoDirectories(null)) {
throw new CloudStoragePseudoDirectoryException(cloudPath);
}
return CloudStorageReadChannel.create(
Expand All @@ -355,7 +357,7 @@ private SeekableByteChannel newWriteChannel(Path path, Set<? extends OpenOption>
throws IOException {
initStorage();
CloudStoragePath cloudPath = CloudStorageUtil.checkPath(path);
if (cloudPath.seemsLikeADirectoryAndUsePseudoDirectories()) {
if (cloudPath.seemsLikeADirectoryAndUsePseudoDirectories(null)) {
throw new CloudStoragePseudoDirectoryException(cloudPath);
}
BlobId file = cloudPath.getBlobId();
Expand Down Expand Up @@ -446,7 +448,7 @@ public InputStream newInputStream(Path path, OpenOption... options) throws IOExc
public boolean deleteIfExists(Path path) throws IOException {
initStorage();
CloudStoragePath cloudPath = CloudStorageUtil.checkPath(path);
if (cloudPath.seemsLikeADirectoryAndUsePseudoDirectories()) {
if (cloudPath.seemsLikeADirectoryAndUsePseudoDirectories(storage)) {
// if the "folder" is empty then we're fine, otherwise complain
// that we cannot act on folders.
try (DirectoryStream<Path> paths = Files.newDirectoryStream(path)) {
Expand Down Expand Up @@ -574,10 +576,13 @@ public void copy(Path source, Path target, CopyOption... options) throws IOExcep
"File systems associated with paths don't agree on pseudo-directories.");
}
}
if (fromPath.seemsLikeADirectoryAndUsePseudoDirectories()) {
// We refuse to use paths that end in '/'. If the user puts a folder name
// but without the '/', they'll get whatever error GCS will return normally
// (if any).
if (fromPath.seemsLikeADirectoryAndUsePseudoDirectories(null)) {
throw new CloudStoragePseudoDirectoryException(fromPath);
}
if (toPath.seemsLikeADirectoryAndUsePseudoDirectories()) {
if (toPath.seemsLikeADirectoryAndUsePseudoDirectories(null)) {
throw new CloudStoragePseudoDirectoryException(toPath);
}

Expand Down Expand Up @@ -672,9 +677,6 @@ public void checkAccess(Path path, AccessMode... modes) throws IOException {
while (true) {
try {
CloudStoragePath cloudPath = CloudStorageUtil.checkPath(path);
if ( cloudPath.seemsLikeADirectoryAndUsePseudoDirectories() ) {
return;
}
boolean nullId;
if (isNullOrEmpty(userProject)) {
nullId = storage.get(
Expand All @@ -689,13 +691,26 @@ public void checkAccess(Path path, AccessMode... modes) throws IOException {
== null;
}
if (nullId) {
if ( cloudPath.seemsLikeADirectoryAndUsePseudoDirectories(storage) ) {
// there is no such file, but we're not signalling error because the
// path is a pseudo-directory.
return;
}
throw new NoSuchFileException(path.toString());
}
break;
} catch (StorageException exs) {
// Will rethrow a StorageException if all retries/reopens are exhausted
retryHandler.handleStorageException(exs);
// we're being aggressive by retrying even on scenarios where we'd normally reopen.
} catch (IllegalArgumentException exs) {
if ( CloudStorageUtil.checkPath(path).seemsLikeADirectoryAndUsePseudoDirectories(storage) ) {
// there is no such file, but we're not signalling error because the
// path is a pseudo-directory.
return;
}
// Other cause for IAE, forward the exception.
throw exs;
}
}
}
Expand All @@ -715,19 +730,34 @@ public <A extends BasicFileAttributes> A readAttributes(
while (true) {
try {
CloudStoragePath cloudPath = CloudStorageUtil.checkPath(path);
if ( cloudPath.seemsLikeADirectoryAndUsePseudoDirectories() ) {
@SuppressWarnings("unchecked")
A result = (A) new CloudStoragePseudoDirectoryAttributes(cloudPath);
return result;
}
BlobInfo blobInfo;
if (isNullOrEmpty(userProject)) {
blobInfo = storage.get(cloudPath.getBlobId());
} else {
blobInfo = storage.get(cloudPath.getBlobId(), BlobGetOption.userProject(userProject));
BlobInfo blobInfo = null;
try {
BlobId blobId = cloudPath.getBlobId();
// Null or empty name won't give us a file, so skip. But perhaps it's a pseudodir.
if (!isNullOrEmpty(blobId.getName())) {
if (isNullOrEmpty(userProject)) {
blobInfo = storage.get(blobId);
} else {
blobInfo = storage.get(blobId, BlobGetOption.userProject(userProject));
}
}
} catch (IllegalArgumentException ex) {
// the path may be invalid but look like a folder. In that case, return a folder.
if (cloudPath.seemsLikeADirectoryAndUsePseudoDirectories(storage)) {
@SuppressWarnings("unchecked")
A result = (A) new CloudStoragePseudoDirectoryAttributes(cloudPath);
return result;
}
// No? Propagate.
throw ex;
}
// null size indicate a file that we haven't closed yet, so GCS treats it as not there yet.
if ( null == blobInfo || blobInfo.getSize() == null ) {
if (cloudPath.seemsLikeADirectoryAndUsePseudoDirectories(storage)) {
@SuppressWarnings("unchecked")
A result = (A) new CloudStoragePseudoDirectoryAttributes(cloudPath);
return result;
}
throw new NoSuchFileException(
"gs://" + cloudPath.getBlobId().getBucket() + "/" + cloudPath.getBlobId().getName());
}
Expand Down Expand Up @@ -785,7 +815,13 @@ public DirectoryStream<Path> newDirectoryStream(final Path dir, final Filter<? s
// Loop will terminate via an exception if all retries are exhausted
while (true) {
try {
final String prefix = cloudPath.toRealPath().toString();
String prePrefix = cloudPath.normalize().toRealPath().toString();
// we can recognize paths without the final "/" as folders,
// but storage.list doesn't do the right thing with those, we need to append a "/".
if (!prePrefix.isEmpty() && !prePrefix.endsWith("/")) {
prePrefix += "/";
}
final String prefix = prePrefix;
Page<Blob> dirList;
if (isNullOrEmpty(userProject)) {
dirList = storage.list(cloudPath.bucket(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,10 @@
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkNotNull;

import com.google.api.gax.paging.Page;
import com.google.cloud.storage.Blob;
import com.google.cloud.storage.BlobId;
import com.google.cloud.storage.Storage;
import com.google.common.collect.UnmodifiableIterator;

import java.io.File;
Expand Down Expand Up @@ -83,8 +86,50 @@ boolean seemsLikeADirectory() {
return path.seemsLikeADirectory();
}

boolean seemsLikeADirectoryAndUsePseudoDirectories() {
return path.seemsLikeADirectory() && fileSystem.config().usePseudoDirectories();
// True if this path may be a directory (and pseudo-directories are enabled)
// Checks:
// 1) does the path end in / ?
// 2) (optional, if storage is set) is there a file whose name starts with path+/ ?
boolean seemsLikeADirectoryAndUsePseudoDirectories(Storage storage) {
if (!fileSystem.config().usePseudoDirectories()) {
return false;
}
if (path.seemsLikeADirectory()) {
return true;
}
// fancy case: the file name doesn't end in slash, but we've been asked to have pseudo dirs.
// Let's see if there are any files with this prefix.
if (storage == null) {
// we are in a context where we don't want to access the storage, so we conservatively
// say this isn't a directory.
return false;
}
// Using the provided path + "/" as a prefix, can we find one file? If so, the path
// is a directory.
String prefix = path.removeBeginningSeparator().toString();
if (!prefix.endsWith("/")) {
prefix += "/";
}
Page<Blob> list = storage.list(
this.bucket(),
Storage.BlobListOption.prefix(prefix),
// we only look at the first result, so no need for a bigger page.
Storage.BlobListOption.pageSize(1));
for (Blob b : list.getValues()) {
// if this blob starts with our prefix and then a slash, then prefix is indeed a folder!
if (b.getBlobId() == null) {
continue;
}
String name = b.getBlobId().getName();
if (name == null) {
continue;
}
if (("/" + name).startsWith(this.path.toAbsolutePath() + "/")) {
return true;
}
}
// no match, so it's not a directory
return false;
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,7 @@ public Tuple<String, Iterable<StorageObject>> list(String bucket, Map<Option, ?>
throws StorageException {
String delimiter = null;
String preprefix = "";
long maxResults = Long.MAX_VALUE;
for (Map.Entry<Option, ?> e : options.entrySet()) {
switch (e.getKey()) {
case PREFIX:
Expand All @@ -138,6 +139,9 @@ public Tuple<String, Iterable<StorageObject>> list(String bucket, Map<Option, ?>
case FIELDS:
// ignore and return all the fields
break;
case MAX_RESULTS:
maxResults = (Long) e.getValue();
break;
default:
throw new UnsupportedOperationException("Unknown option: " + e.getKey());
}
Expand All @@ -156,6 +160,16 @@ public Tuple<String, Iterable<StorageObject>> list(String bucket, Map<Option, ?>
values.add(so);
}
values.addAll(folders.values());

// truncate to max allowed length
if (values.size() > maxResults) {
List<StorageObject> newValues = new ArrayList<>();
for (int i=0; i < maxResults; i++) {
newValues.add(values.get(i));
}
values = newValues;
}

// null cursor to indicate there is no more data (empty string would cause us to be called again).
// The type cast seems to be necessary to help Java's typesystem remember that collections are iterable.
return Tuple.of(null, (Iterable<StorageObject>) values);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;

import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.URI;
Expand All @@ -54,6 +55,7 @@
import java.nio.file.OpenOption;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
Expand Down Expand Up @@ -397,6 +399,53 @@ public void testExists_trailingSlash_disablePseudoDirectories() throws Exception
}
}

@Test
public void testFakeDirectories() throws IOException {
try (FileSystem fs = forBucket("military")) {
List<Path> paths = new ArrayList<>();
paths.add(fs.getPath("dir/angel"));
paths.add(fs.getPath("dir/deepera"));
paths.add(fs.getPath("dir/deeperb"));
paths.add(fs.getPath("dir/deeper_"));
paths.add(fs.getPath("dir/deeper.sea/hasfish"));
paths.add(fs.getPath("dir/deeper/fish"));
for (Path path : paths) {
Files.createFile(path);
}

// ends with slash, must be a directory
assertThat(Files.isDirectory(fs.getPath("dir/"))).isTrue();
// files are not directories
assertThat(Files.exists(fs.getPath("dir/angel"))).isTrue();
assertThat(Files.isDirectory(fs.getPath("dir/angel"))).isFalse();
// directories are recognized even without the trailing "/"
assertThat(Files.isDirectory(fs.getPath("dir"))).isTrue();
// also works for absolute paths
assertThat(Files.isDirectory(fs.getPath("/dir"))).isTrue();
// non-existent files are not directories (but they don't make us crash)
assertThat(Files.isDirectory(fs.getPath("di"))).isFalse();
assertThat(Files.isDirectory(fs.getPath("dirs"))).isFalse();
assertThat(Files.isDirectory(fs.getPath("dir/deep"))).isFalse();
assertThat(Files.isDirectory(fs.getPath("dir/deeper/fi"))).isFalse();
assertThat(Files.isDirectory(fs.getPath("/dir/deeper/fi"))).isFalse();
// also works for subdirectories
assertThat(Files.isDirectory(fs.getPath("dir/deeper/"))).isTrue();
assertThat(Files.isDirectory(fs.getPath("dir/deeper"))).isTrue();
assertThat(Files.isDirectory(fs.getPath("/dir/deeper/"))).isTrue();
assertThat(Files.isDirectory(fs.getPath("/dir/deeper"))).isTrue();
// dot and .. folders are directories
assertThat(Files.isDirectory(fs.getPath("dir/deeper/."))).isTrue();
assertThat(Files.isDirectory(fs.getPath("dir/deeper/.."))).isTrue();
// dots in the name are fine
assertThat(Files.isDirectory(fs.getPath("dir/deeper.sea/"))).isTrue();
assertThat(Files.isDirectory(fs.getPath("dir/deeper.sea"))).isTrue();
assertThat(Files.isDirectory(fs.getPath("dir/deeper.seax"))).isFalse();
// the root folder is a directory
assertThat(Files.isDirectory(fs.getPath("/"))).isTrue();
assertThat(Files.isDirectory(fs.getPath(""))).isTrue();
}
}

@Test
public void testDelete() throws Exception {
Files.write(Paths.get(URI.create("gs://love/fashion")), "(✿◕ ‿◕ )ノ".getBytes(UTF_8));
Expand Down
Loading

0 comments on commit 636588c

Please sign in to comment.