From 2d8ba2974ce4da8e5e616a41567902533dda4181 Mon Sep 17 00:00:00 2001 From: Dan King Date: Thu, 11 Mar 2021 16:31:48 -0500 Subject: [PATCH] [query-service] teach query service to read MTs and Ts created by Spark (#10184) * [query-service] teach query service to read MTs and Ts created by Spark Hail-on-Spark uses HadoopFS which emulates directories by creating size-zero files with the name `gs://bucket/dirname/`. Note: the object name literally ends in a slash. Such files should not be included in `listStatus` (they should always be empty anyway). Unfortunately, my fix in https://github.com/hail-is/hail/pull/9914 was wrong because `GoogleStorageFileStatus` removes the trailing slash. This prevented the path from matching `path`, which always ends in a `/`. * fix --- hail/src/main/scala/is/hail/io/fs/GoogleStorageFS.scala | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/hail/src/main/scala/is/hail/io/fs/GoogleStorageFS.scala b/hail/src/main/scala/is/hail/io/fs/GoogleStorageFS.scala index d772178b6d2..0772aca1322 100644 --- a/hail/src/main/scala/is/hail/io/fs/GoogleStorageFS.scala +++ b/hail/src/main/scala/is/hail/io/fs/GoogleStorageFS.scala @@ -317,8 +317,9 @@ class GoogleStorageFS(serviceAccountKey: String) extends FS { val blobs = storage.list(bucket, BlobListOption.prefix(path), BlobListOption.currentDirectory()) blobs.getValues.iterator.asScala - .map(b => GoogleStorageFileStatus(b)) - .filter(fs => !(fs.isDirectory && fs.getPath == path)) + .map(b => (b, GoogleStorageFileStatus(b))) + .filter { case (b, fs) => !(fs.isDirectory && b.getName == path) } // elide directory markers created by Hadoop + .map { case (b, fs) => fs } .toArray }