From 1096cbb3678e094f29150aa9f9c14c6c1fecc0d2 Mon Sep 17 00:00:00 2001
From: Alberto
Date: Wed, 17 Apr 2019 15:24:04 -0300
Subject: [PATCH 1/2] fixed concurrent access to TF in spell checker
---
.../ml/tensorflow/TensorflowSpell.scala | 29 +++++++++----------
.../context/ContextSpellCheckerTestSpec.scala | 12 ++------
2 files changed, 17 insertions(+), 24 deletions(-)
diff --git a/src/main/scala/com/johnsnowlabs/ml/tensorflow/TensorflowSpell.scala b/src/main/scala/com/johnsnowlabs/ml/tensorflow/TensorflowSpell.scala
index de589c0539be26..d514c39e53e607 100644
--- a/src/main/scala/com/johnsnowlabs/ml/tensorflow/TensorflowSpell.scala
+++ b/src/main/scala/com/johnsnowlabs/ml/tensorflow/TensorflowSpell.scala
@@ -16,30 +16,29 @@ class TensorflowSpell(
val lossKey = "Add:0"
val dropoutRate = "dropout_rate"
- /* returns the loss associated with the last word, given previous history */
- def predict(dataset: Array[Array[Int]], cids: Array[Array[Int]], cwids:Array[Array[Int]]) = this.synchronized {
-
- val packed = dataset.zip(cids).zip(cwids).map {
- case ((_ids, _cids), _cwids) => Array(_ids, _cids, _cwids)
- }
+ // these are the inputs to the graph
+ val wordIds = "batches:0"
+ val contextIds = "batches:1"
+ val contextWordIds = "batches:2"
- val tensors = new TensorResources()
- val inputTensor = tensors.createTensor(packed)
+ /* returns the loss associated with the last word, given previous history */
+ def predict(dataset: Array[Array[Int]], cids: Array[Array[Int]], cwids:Array[Array[Int]]) = {
- tensorflow.session.runner
- .feed(inMemoryInput, inputTensor)
- .addTarget(testInitOp)
- .run()
+ val tensors = new TensorResources
val lossWords = tensorflow.session.runner
.feed(dropoutRate, tensors.createTensor(1.0f))
+ .feed(wordIds, tensors.createTensor(dataset.map(_.dropRight(1))))
+ .feed(contextIds, tensors.createTensor(cids.map(_.tail)))
+ .feed(contextWordIds, tensors.createTensor(cwids.map(_.tail)))
.fetch(lossKey)
.fetch(validWords)
.run()
- val result = extractFloats(lossWords.get(0))
- val width = inputTensor.shape()(2)
- result.grouped(width.toInt - 1).map(_.last)
+ tensors.clearTensors()
+ val result = extractFloats(lossWords.get(0))
+ val width = dataset.head.length
+ result.grouped(width - 1).map(_.last)
}
}
diff --git a/src/test/scala/com/johnsnowlabs/nlp/annotators/spell/context/ContextSpellCheckerTestSpec.scala b/src/test/scala/com/johnsnowlabs/nlp/annotators/spell/context/ContextSpellCheckerTestSpec.scala
index e09acd3d2af43f..7f1662f9e622ea 100644
--- a/src/test/scala/com/johnsnowlabs/nlp/annotators/spell/context/ContextSpellCheckerTestSpec.scala
+++ b/src/test/scala/com/johnsnowlabs/nlp/annotators/spell/context/ContextSpellCheckerTestSpec.scala
@@ -6,6 +6,8 @@ import com.johnsnowlabs.nlp.annotators.spell.context.parser._
import com.johnsnowlabs.nlp.{Annotation, DocumentAssembler, LightPipeline, SparkAccessor}
import org.apache.spark.ml.Pipeline
import org.scalatest._
+import SparkAccessor.spark
+import spark.implicits._
class ContextSpellCheckerTestSpec extends FlatSpec {
@@ -63,9 +65,6 @@ class ContextSpellCheckerTestSpec extends FlatSpec {
"a Spell Checker" should "work in a pipeline with Tokenizer" in {
- import SparkAccessor.spark
- import spark.implicits._
-
val data = Seq("It was a cold , dreary day and the country was white with smow .",
"He wos re1uctant to clange .",
"he is gane .").toDF("text")
@@ -94,8 +93,6 @@ class ContextSpellCheckerTestSpec extends FlatSpec {
}
-
-
"a Spell Checker" should "work in a light pipeline" in {
import SparkAccessor.spark
import spark.implicits._
@@ -119,10 +116,7 @@ class ContextSpellCheckerTestSpec extends FlatSpec {
val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, spellChecker)).fit(Seq.empty[String].toDF("text"))
val lp = new LightPipeline(pipeline)
- lp.annotate(data)
- lp.annotate(data)
- lp.annotate(data)
-
+ lp.annotate(data ++ data ++ data)
}
From 0f84d5ee433485f1711ff0928d2dee9614d2008e Mon Sep 17 00:00:00 2001
From: Saif Addin
Date: Fri, 26 Apr 2019 10:59:14 -0300
Subject: [PATCH 2/2] Open Slack
---
README.md | 2 +-
docs/index.html | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/README.md b/README.md
index 7d83d4ebd9e8fe..78fc45a07f8b9e 100644
--- a/README.md
+++ b/README.md
@@ -10,7 +10,7 @@ Take a look at our official spark-nlp page: http://nlp.johnsnowlabs.com/ for use
## Slack community channel
-Questions? Feedback? Request access sending an email to nlp@johnsnowlabs.com
+[Join Slack](https://join.slack.com/t/spark-nlp/shared_invite/enQtNjA4MTE2MDI1MDkxLTM4ZDliMjU5OWZmMDE1ZGVkMjg0MWFjMjU3NjY4YThlMTJkNmNjNjM3NTMwYzlhMWY4MGMzODI2NDBkOWU4ZDE)
## Table of contents
diff --git a/docs/index.html b/docs/index.html
index 112e45dba3e54d..49e32770586c8a 100644
--- a/docs/index.html
+++ b/docs/index.html
@@ -76,7 +76,7 @@ High Performance NLP with Apache Spark
distributed
large scale environment.
- Questions? Join our Slack
+ Questions? Join our Slack
2019 March 23rd - Update! 2.0.1 Released! Bert embeddings, embeddings as annotators, better OCR, new pretrained pipelines and much more!