Skip to content

Commit

Permalink
Added doctest and method description in context.py
Browse files Browse the repository at this point in the history
Added doctest for method textFile and description for methods _initialize_context and _ensure_initialized in context.py

Author: Jyotiska NK <jyotiska123@gmail.com>

Closes apache#187 from jyotiska/pyspark_context and squashes the following commits:

356f945 [Jyotiska NK] Added doctest for textFile method in context.py
5b23686 [Jyotiska NK] Updated context.py with method descriptions
  • Loading branch information
jyotiska authored and mateiz committed May 29, 2014
1 parent 4dbb27b commit 9cff1dd
Showing 1 changed file with 14 additions and 1 deletion.
15 changes: 14 additions & 1 deletion python/pyspark/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,12 +173,18 @@ def __init__(self, master=None, appName=None, sparkHome=None, pyFiles=None,
self._temp_dir = \
self._jvm.org.apache.spark.util.Utils.createTempDir(local_dir).getAbsolutePath()

# Initialize SparkContext in function to allow subclass specific initialization
def _initialize_context(self, jconf):
"""
Initialize SparkContext in function to allow subclass specific initialization
"""
return self._jvm.JavaSparkContext(jconf)

@classmethod
def _ensure_initialized(cls, instance=None, gateway=None):
"""
Checks whether a SparkContext is initialized or not.
Throws error if a SparkContext is already running.
"""
with SparkContext._lock:
if not SparkContext._gateway:
SparkContext._gateway = gateway or launch_gateway()
Expand Down Expand Up @@ -270,6 +276,13 @@ def textFile(self, name, minPartitions=None):
Read a text file from HDFS, a local file system (available on all
nodes), or any Hadoop-supported file system URI, and return it as an
RDD of Strings.
>>> path = os.path.join(tempdir, "sample-text.txt")
>>> with open(path, "w") as testFile:
... testFile.write("Hello world!")
>>> textFile = sc.textFile(path)
>>> textFile.collect()
[u'Hello world!']
"""
minPartitions = minPartitions or min(self.defaultParallelism, 2)
return RDD(self._jsc.textFile(name, minPartitions), self,
Expand Down

0 comments on commit 9cff1dd

Please sign in to comment.