Skip to content

Commit

Permalink
edit python sparkstreaming example
Browse files Browse the repository at this point in the history
  • Loading branch information
Ken Takagiwa authored and giwa committed Aug 18, 2014
1 parent 0b8b7d0 commit d1ee6ca
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 1 deletion.
8 changes: 7 additions & 1 deletion examples/src/main/python/streaming/network_wordcount.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,28 @@
import sys
from operator import add

from pyspark.conf import SparkConf
from pyspark.streaming.context import StreamingContext
from pyspark.streaming.duration import *

if __name__ == "__main__":
if len(sys.argv) != 3:
print >> sys.stderr, "Usage: wordcount <hostname> <port>"
exit(-1)
ssc = StreamingContext(appName="PythonStreamingNetworkWordCount", duration=Seconds(1))
conf = SparkConf()
conf.setAppName("PythonStreamingNetworkWordCount")
conf.set("spark.default.parallelism", 1)
ssc = StreamingContext(conf=conf, duration=Seconds(1))

lines = ssc.socketTextStream(sys.argv[1], int(sys.argv[2]))
fm_lines = lines.flatMap(lambda x: x.split(" "))
filtered_lines = fm_lines.filter(lambda line: "Spark" in line)
mapped_lines = fm_lines.map(lambda x: (x, 1))
reduced_lines = mapped_lines.reduce(add)

fm_lines.pyprint()
filtered_lines.pyprint()
mapped_lines.pyprint()
reduced_lines.pyprint()
ssc.start()
ssc.awaitTermination()
1 change: 1 addition & 0 deletions examples/src/main/python/streaming/wordcount.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
conf.setAppName("PythonStreamingWordCount")
conf.set("spark.default.parallelism", 1)

# still has a bug
# ssc = StreamingContext(appName="PythonStreamingWordCount", duration=Seconds(1))
ssc = StreamingContext(conf=conf, duration=Seconds(1))

Expand Down

0 comments on commit d1ee6ca

Please sign in to comment.