From e1c5c4a4f06655fd2cfe5ea7b2107e7ec9f84467 Mon Sep 17 00:00:00 2001
From: Saif Addin Requirements & Setup
To start using the library, execute any of the following lines
depending on your desired use case:
spark-shell --packages JohnSnowLabs:spark-nlp:2.0.2
-pyspark --packages JohnSnowLabs:spark-nlp:2.0.2
-spark-submit --packages JohnSnowLabs:spark-nlp:2.0.2
+ spark-shell --packages JohnSnowLabs:spark-nlp:2.0.3
+pyspark --packages JohnSnowLabs:spark-nlp:2.0.3
+spark-submit --packages JohnSnowLabs:spark-nlp:2.0.3
Straight forward Python on jupyter notebook
Use pip to install (after you pip installed numpy and pyspark)
- pip install spark-nlp==2.0.2
+ pip install spark-nlp==2.0.3
jupyter notebook
The easiest way to get started, is to run the following code:
import sparknlp
@@ -131,21 +131,21 @@ Straight forward Python on jupyter notebook
.appName('OCR Eval') \
.config("spark.driver.memory", "6g") \
.config("spark.executor.memory", "6g") \
- .config("spark.jars.packages", "JohnSnowLabs:spark-nlp:2.0.2") \
+ .config("spark.jars.packages", "JohnSnowLabs:spark-nlp:2.0.3") \
.getOrCreate()
Databricks cloud cluster & Apache Zeppelin
Add the following maven coordinates in the dependency configuration page:
- com.johnsnowlabs.nlp:spark-nlp_2.11:2.0.2
+ com.johnsnowlabs.nlp:spark-nlp_2.11:2.0.3
For Python in Apache Zeppelin you may need to setup SPARK_SUBMIT_OPTIONS utilizing --packages instruction shown above like this
- export SPARK_SUBMIT_OPTIONS="--packages JohnSnowLabs:spark-nlp:2.0.2"
+ export SPARK_SUBMIT_OPTIONS="--packages JohnSnowLabs:spark-nlp:2.0.3"
Python Jupyter Notebook with PySpark
export SPARK_HOME=/path/to/your/spark/folder
export PYSPARK_DRIVER_PYTHON=jupyter
export PYSPARK_DRIVER_PYTHON_OPTS=notebook
-pyspark --packages JohnSnowLabs:spark-nlp:2.0.2
+pyspark --packages JohnSnowLabs:spark-nlp:2.0.3
S3 based standalone cluster (No Hadoop)
If your distributed storage is S3 and you don't have a standard hadoop configuration (i.e. fs.defaultFS)
@@ -442,7 +442,7 @@
Utilizing Spark NLP OCR Module
Spark NLP OCR Module is not included within Spark NLP. It is not an annotator and not an extension to Spark ML.
You can include it with the following coordinates for Maven:
-
com.johnsnowlabs.nlp:spark-nlp-ocr_2.11:2.0.2
+ com.johnsnowlabs.nlp:spark-nlp-ocr_2.11:2.0.3
Creating Spark datasets from PDF (To be used with Spark NLP)
diff --git a/python/setup.py b/python/setup.py
index 664b00409988b0..cbaff2c2d28b92 100644
--- a/python/setup.py
+++ b/python/setup.py
@@ -40,7 +40,7 @@
# For a discussion on single-sourcing the version across setup.py and the
# project code, see
# https://packaging.python.org/en/latest/single_source_version.html
- version='2.0.2', # Required
+ version='2.0.3', # Required
# This is a one-line description or tagline of what your project does. This
# corresponds to the "Summary" metadata field:
diff --git a/python/sparknlp/__init__.py b/python/sparknlp/__init__.py
index 29a949b3d06eb8..2411335ac6371e 100644
--- a/python/sparknlp/__init__.py
+++ b/python/sparknlp/__init__.py
@@ -36,8 +36,8 @@ def start(include_ocr=False):
.config("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
if include_ocr:
- builder.config("spark.jars.packages", "JohnSnowLabs:spark-nlp:2.0.2,com.johnsnowlabs.nlp:spark-nlp-ocr_2.11:2.0.2")
+ builder.config("spark.jars.packages", "JohnSnowLabs:spark-nlp:2.0.3,com.johnsnowlabs.nlp:spark-nlp-ocr_2.11:2.0.3")
else:
- builder.config("spark.jars.packages", "JohnSnowLabs:spark-nlp:2.0.2") \
+ builder.config("spark.jars.packages", "JohnSnowLabs:spark-nlp:2.0.3") \
return builder.getOrCreate()
diff --git a/src/main/scala/com/johnsnowlabs/nlp/SparkNLP.scala b/src/main/scala/com/johnsnowlabs/nlp/SparkNLP.scala
index d8663b42346551..3cf12f78328b89 100644
--- a/src/main/scala/com/johnsnowlabs/nlp/SparkNLP.scala
+++ b/src/main/scala/com/johnsnowlabs/nlp/SparkNLP.scala
@@ -12,9 +12,9 @@ object SparkNLP {
.config("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
if (includeOcr) {
- build.config("spark.jars.packages", "JohnSnowLabs:spark-nlp:2.0.2,com.johnsnowlabs.nlp:spark-nlp-ocr_2.11:2.0.2")
+ build.config("spark.jars.packages", "JohnSnowLabs:spark-nlp:2.0.3,com.johnsnowlabs.nlp:spark-nlp-ocr_2.11:2.0.3")
} else {
- build.config("spark.jars.packages", "JohnSnowLabs:spark-nlp:2.0.2")
+ build.config("spark.jars.packages", "JohnSnowLabs:spark-nlp:2.0.3")
}
build.getOrCreate()
diff --git a/src/main/scala/com/johnsnowlabs/util/Build.scala b/src/main/scala/com/johnsnowlabs/util/Build.scala
index e236e79daf1fa7..203ad9a71ce922 100644
--- a/src/main/scala/com/johnsnowlabs/util/Build.scala
+++ b/src/main/scala/com/johnsnowlabs/util/Build.scala
@@ -11,6 +11,6 @@ object Build {
if (version != null && version.nonEmpty)
version
else
- "2.0.2"
+ "2.0.3"
}
}
\ No newline at end of file