Merge branch 'master' into 202-release-candidate

# Conflicts: # src/main/scala/com/johnsnowlabs/ml/tensorflow/TensorflowSpell.scala
JohnSnowLabs · Apr 29, 2019 · 0991ad5 · 0991ad5
2 parents c247b17 + 0977e10
commit 0991ad5
Show file tree

Hide file tree

Showing 4 changed files with 17 additions and 28 deletions.
diff --git a/README.md b/README.md
@@ -10,7 +10,7 @@ Take a look at our official spark-nlp page: http://nlp.johnsnowlabs.com/ for use
 
 ## Slack community channel
 
-Questions? Feedback? Request access sending an email to nlp@johnsnowlabs.com
+[Join Slack](https://join.slack.com/t/spark-nlp/shared_invite/enQtNjA4MTE2MDI1MDkxLTM4ZDliMjU5OWZmMDE1ZGVkMjg0MWFjMjU3NjY4YThlMTJkNmNjNjM3NTMwYzlhMWY4MGMzODI2NDBkOWU4ZDE)
 
 ## Table of contents
 

diff --git a/docs/index.html b/docs/index.html
@@ -76,7 +76,7 @@ <h2 class="title">High Performance NLP with Apache Spark </h2>
                     distributed
                     large scale environment.
                     </p>
-                <a class="btn btn-info btn-cta" style="float: center;margin-top: 10px;" href="mailto:nlp@johnsnowlabs.com?subject=SparkNLP%20Slack%20access" target="_blank"> Questions? Join our Slack</a>
+                <a class="btn btn-info btn-cta" style="float: center;margin-top: 10px;" href="https://join.slack.com/t/spark-nlp/shared_invite/enQtNjA4MTE2MDI1MDkxLTM4ZDliMjU5OWZmMDE1ZGVkMjg0MWFjMjU3NjY4YThlMTJkNmNjNjM3NTMwYzlhMWY4MGMzODI2NDBkOWU4ZDE" target="_blank"> Questions? Join our Slack</a>
                 <b/><p/><p/>
                 <p><span class="label label-warning">2019 March 23rd - Update!</span> 2.0.1 Released! Bert embeddings, embeddings as annotators, better OCR, new pretrained pipelines and much more!</p>
             </div>

diff --git a/src/main/scala/com/johnsnowlabs/ml/tensorflow/TensorflowSpell.scala b/src/main/scala/com/johnsnowlabs/ml/tensorflow/TensorflowSpell.scala
@@ -1,7 +1,5 @@
 package com.johnsnowlabs.ml.tensorflow
 
-import java.lang.reflect.Modifier
-
 import com.johnsnowlabs.ml.tensorflow.TensorResources.extractFloats
 import com.johnsnowlabs.nlp.annotators.ner.Verbose
 
@@ -18,32 +16,29 @@ class TensorflowSpell(
   val lossKey = "Add:0"
   val dropoutRate = "dropout_rate"
 
-  /* returns the loss associated with the last word, given previous history  */
-  def predict(dataset: Array[Array[Int]], cids: Array[Array[Int]], cwids:Array[Array[Int]]) = this.synchronized {
-
-    val packed = dataset.zip(cids).zip(cwids).map {
-      case ((_ids, _cids), _cwids) => Array(_ids, _cids, _cwids)
-    }
+  // these are the inputs to the graph
+  val wordIds = "batches:0"
+  val contextIds = "batches:1"
+  val contextWordIds = "batches:2"
 
-    val tensors = new TensorResources()
-    val inputTensor = tensors.createTensor(packed)
+  /* returns the loss associated with the last word, given previous history  */
+  def predict(dataset: Array[Array[Int]], cids: Array[Array[Int]], cwids:Array[Array[Int]]) = {
 
-    tensorflow.getSession.runner
-      .feed(inMemoryInput, inputTensor)
-      .addTarget(testInitOp)
-      .run()
+    val tensors = new TensorResources
 
     val lossWords = tensorflow.getSession.runner
       .feed(dropoutRate, tensors.createTensor(1.0f))
+      .feed(wordIds, tensors.createTensor(dataset.map(_.dropRight(1))))
+      .feed(contextIds, tensors.createTensor(cids.map(_.tail)))
+      .feed(contextWordIds, tensors.createTensor(cwids.map(_.tail)))
       .fetch(lossKey)
       .fetch(validWords)
       .run()
 
     tensors.clearTensors()
 
     val result = extractFloats(lossWords.get(0))
-    val width = inputTensor.shape()(2)
-    result.grouped(width.toInt - 1).map(_.last)
-
+    val width = dataset.head.length
+    result.grouped(width - 1).map(_.last)
   }
 }
diff --git a/...est/scala/com/johnsnowlabs/nlp/annotators/spell/context/ContextSpellCheckerTestSpec.scala b/...est/scala/com/johnsnowlabs/nlp/annotators/spell/context/ContextSpellCheckerTestSpec.scala
@@ -13,6 +13,8 @@ import org.apache.hadoop.fs.FileSystem
 import org.apache.spark.ml.Pipeline
 import org.apache.spark.sql.SparkSession
 import org.scalatest._
+import SparkAccessor.spark
+import spark.implicits._
 
 
 class ContextSpellCheckerTestSpec extends FlatSpec {
@@ -107,9 +109,6 @@ class ContextSpellCheckerTestSpec extends FlatSpec {
 
 
   "a Spell Checker" should "work in a pipeline with Tokenizer" in {
-    import SparkAccessor.spark
-    import spark.implicits._
-
     val data = Seq("It was a cold , dreary day and the country was white with smow .",
       "He wos re1uctant to clange .",
       "he is gane .").toDF("text")
@@ -138,8 +137,6 @@ class ContextSpellCheckerTestSpec extends FlatSpec {
 
   }
 
-
-
   "a Spell Checker" should "work in a light pipeline" in {
     import SparkAccessor.spark
     import spark.implicits._
@@ -163,10 +160,7 @@ class ContextSpellCheckerTestSpec extends FlatSpec {
 
     val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, spellChecker)).fit(Seq.empty[String].toDF("text"))
     val lp = new LightPipeline(pipeline)
-    lp.annotate(data)
-    lp.annotate(data)
-    lp.annotate(data)
-
+    lp.annotate(data ++ data ++ data)
   }