diff --git a/readme.md b/readme.md index caf6e0f..387753c 100644 --- a/readme.md +++ b/readme.md @@ -17,10 +17,14 @@ _The name of this project refers to the `BareLocalFileSystem` and `NakedLocalFil 2. Then specify that you want to use the Bare Local File System implementation `com.globalmentor.apache.hadoop.fs.BareLocalFileSystem` for the `file` scheme. (`BareLocalFileSystem` internally uses `NakedLocalFileSystem`.) The following example does this for Spark in Java: ```java -SparkSession spark = SparkSession.builder().appName("Foo Bar").master("local").getOrCreate(); -spark.sparkContext().hadoopConfiguration().setClass("fs.file.impl", BareLocalFileSystem.class, FileSystem.class); +SparkSession spark = SparkSession.builder().appName("Foo Bar").master("local"). + config("spark.hadoop.fs.file.impl", BareLocalFileSystem.class.getName()). + config("spark.hadoop.fs.AbstractFileSystem.file.impl", BareStreamingLocalFileSystem.class.getName()). + getOrCreate(); ``` +The config should be set before getOrCreate as Spark processes such as local meta-stores are already evaluating the Hadoop FS _before_ any hadoop config setClass call. The AbstractFileSystem property is required to enable streaming checkpoint operations. + _Note that you may still get warnings that "HADOOP_HOME and hadoop.home.dir are unset" and "Did not find winutils.exe". This is because the Winutils kludge permeates the Hadoop code and is hard-coded at a low-level, executed statically upon class loading, even for code completely unrelated to file access. See [HADOOP-13223: winutils.exe is a bug nexus and should be killed with an axe.](https://issues.apache.org/jira/browse/HADOOP-13223)_ ## Limitations diff --git a/src/main/java/org/apache/hadoop/fs/local/BareStreamingLocalFileSystem.java b/src/main/java/org/apache/hadoop/fs/local/BareStreamingLocalFileSystem.java new file mode 100644 index 0000000..ecae501 --- /dev/null +++ b/src/main/java/org/apache/hadoop/fs/local/BareStreamingLocalFileSystem.java @@ -0,0 +1,20 @@ +package org.apache.hadoop.fs.local; + +import com.globalmentor.apache.hadoop.fs.BareLocalFileSystem; +import org.apache.hadoop.fs.DelegateToFileSystem; + +import java.io.IOException; +import java.net.URISyntaxException; + +/** + * Required to handle streaming and checkpointing + * + * fs.AbstractFileSystem.file.impl + * spark.hadoop.fs.AbstractFileSystem.file.impl + * + */ +public class BareStreamingLocalFileSystem extends DelegateToFileSystem { + public BareStreamingLocalFileSystem(java.net.URI uri, org.apache.hadoop.conf.Configuration conf) throws IOException, URISyntaxException { + super(uri, new BareLocalFileSystem(), conf, "file", false); + } +} \ No newline at end of file