From 8ef7f0ccca014a190a9f4a44100ffa2a2f1a4c3d Mon Sep 17 00:00:00 2001 From: Daniel King Date: Thu, 11 Mar 2021 13:07:56 -0500 Subject: [PATCH 1/2] [query-service] teach query service to read MTs and Ts created by Spark Hail-on-Spark uses HadoopFS which emulates directories by creating size-zero files with the name `gs://bucket/dirname/`. Note: the object name literally ends in a slash. Such files should not be included in `listStatus` (they should always be empty anyway). Unfortunately, my fix in https://github.com/hail-is/hail/pull/9914 was wrong because `GoogleStorageFileStatus` removes the trailing slash. This prevented the path from matching `path`, which always ends in a `/`. --- hail/src/main/scala/is/hail/io/fs/GoogleStorageFS.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/hail/src/main/scala/is/hail/io/fs/GoogleStorageFS.scala b/hail/src/main/scala/is/hail/io/fs/GoogleStorageFS.scala index d772178b6d2..863f8eb851a 100644 --- a/hail/src/main/scala/is/hail/io/fs/GoogleStorageFS.scala +++ b/hail/src/main/scala/is/hail/io/fs/GoogleStorageFS.scala @@ -317,8 +317,9 @@ class GoogleStorageFS(serviceAccountKey: String) extends FS { val blobs = storage.list(bucket, BlobListOption.prefix(path), BlobListOption.currentDirectory()) blobs.getValues.iterator.asScala + .filter(b => b.getName != path) // elide directory markers created by Hadoop .map(b => GoogleStorageFileStatus(b)) - .filter(fs => !(fs.isDirectory && fs.getPath == path)) + .filter(fs => !fs.isDirectory) .toArray } From 9c5328fa881b8dc5ff7aef7912332e6c388d95dc Mon Sep 17 00:00:00 2001 From: Daniel King Date: Thu, 11 Mar 2021 15:00:17 -0500 Subject: [PATCH 2/2] fix --- hail/src/main/scala/is/hail/io/fs/GoogleStorageFS.scala | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/hail/src/main/scala/is/hail/io/fs/GoogleStorageFS.scala b/hail/src/main/scala/is/hail/io/fs/GoogleStorageFS.scala index 863f8eb851a..0772aca1322 100644 --- a/hail/src/main/scala/is/hail/io/fs/GoogleStorageFS.scala +++ b/hail/src/main/scala/is/hail/io/fs/GoogleStorageFS.scala @@ -317,9 +317,9 @@ class GoogleStorageFS(serviceAccountKey: String) extends FS { val blobs = storage.list(bucket, BlobListOption.prefix(path), BlobListOption.currentDirectory()) blobs.getValues.iterator.asScala - .filter(b => b.getName != path) // elide directory markers created by Hadoop - .map(b => GoogleStorageFileStatus(b)) - .filter(fs => !fs.isDirectory) + .map(b => (b, GoogleStorageFileStatus(b))) + .filter { case (b, fs) => !(fs.isDirectory && b.getName == path) } // elide directory markers created by Hadoop + .map { case (b, fs) => fs } .toArray }