diff --git a/README.md b/README.md index 612890d2b0f7c8..90f6e7194e1c33 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,7 @@ Take a look at our official spark-nlp page: http://nlp.johnsnowlabs.com/ for use ## Slack community channel -Questions? Feedback? Request access sending an email to nlp@johnsnowlabs.com +[Join Slack](https://join.slack.com/t/spark-nlp/shared_invite/enQtNjA4MTE2MDI1MDkxLTM4ZDliMjU5OWZmMDE1ZGVkMjg0MWFjMjU3NjY4YThlMTJkNmNjNjM3NTMwYzlhMWY4MGMzODI2NDBkOWU4ZDE) ## Table of contents diff --git a/docs/index.html b/docs/index.html index 112e45dba3e54d..49e32770586c8a 100644 --- a/docs/index.html +++ b/docs/index.html @@ -76,7 +76,7 @@

High Performance NLP with Apache Spark

distributed large scale environment.

- Questions? Join our Slack + Questions? Join our Slack

2019 March 23rd - Update! 2.0.1 Released! Bert embeddings, embeddings as annotators, better OCR, new pretrained pipelines and much more!

diff --git a/src/main/scala/com/johnsnowlabs/ml/tensorflow/TensorflowSpell.scala b/src/main/scala/com/johnsnowlabs/ml/tensorflow/TensorflowSpell.scala index a20a1f23afc762..9a7a84d098d1bc 100644 --- a/src/main/scala/com/johnsnowlabs/ml/tensorflow/TensorflowSpell.scala +++ b/src/main/scala/com/johnsnowlabs/ml/tensorflow/TensorflowSpell.scala @@ -1,7 +1,5 @@ package com.johnsnowlabs.ml.tensorflow -import java.lang.reflect.Modifier - import com.johnsnowlabs.ml.tensorflow.TensorResources.extractFloats import com.johnsnowlabs.nlp.annotators.ner.Verbose @@ -18,23 +16,21 @@ class TensorflowSpell( val lossKey = "Add:0" val dropoutRate = "dropout_rate" - /* returns the loss associated with the last word, given previous history */ - def predict(dataset: Array[Array[Int]], cids: Array[Array[Int]], cwids:Array[Array[Int]]) = this.synchronized { - - val packed = dataset.zip(cids).zip(cwids).map { - case ((_ids, _cids), _cwids) => Array(_ids, _cids, _cwids) - } + // these are the inputs to the graph + val wordIds = "batches:0" + val contextIds = "batches:1" + val contextWordIds = "batches:2" - val tensors = new TensorResources() - val inputTensor = tensors.createTensor(packed) + /* returns the loss associated with the last word, given previous history */ + def predict(dataset: Array[Array[Int]], cids: Array[Array[Int]], cwids:Array[Array[Int]]) = { - tensorflow.getSession.runner - .feed(inMemoryInput, inputTensor) - .addTarget(testInitOp) - .run() + val tensors = new TensorResources val lossWords = tensorflow.getSession.runner .feed(dropoutRate, tensors.createTensor(1.0f)) + .feed(wordIds, tensors.createTensor(dataset.map(_.dropRight(1)))) + .feed(contextIds, tensors.createTensor(cids.map(_.tail))) + .feed(contextWordIds, tensors.createTensor(cwids.map(_.tail))) .fetch(lossKey) .fetch(validWords) .run() @@ -42,8 +38,7 @@ class TensorflowSpell( tensors.clearTensors() val result = extractFloats(lossWords.get(0)) - val width = inputTensor.shape()(2) - result.grouped(width.toInt - 1).map(_.last) - + val width = dataset.head.length + result.grouped(width - 1).map(_.last) } } diff --git a/src/test/scala/com/johnsnowlabs/nlp/annotators/spell/context/ContextSpellCheckerTestSpec.scala b/src/test/scala/com/johnsnowlabs/nlp/annotators/spell/context/ContextSpellCheckerTestSpec.scala index d8c19be5d2ee5d..58d391eac9e293 100644 --- a/src/test/scala/com/johnsnowlabs/nlp/annotators/spell/context/ContextSpellCheckerTestSpec.scala +++ b/src/test/scala/com/johnsnowlabs/nlp/annotators/spell/context/ContextSpellCheckerTestSpec.scala @@ -13,6 +13,8 @@ import org.apache.hadoop.fs.FileSystem import org.apache.spark.ml.Pipeline import org.apache.spark.sql.SparkSession import org.scalatest._ +import SparkAccessor.spark +import spark.implicits._ class ContextSpellCheckerTestSpec extends FlatSpec { @@ -107,9 +109,6 @@ class ContextSpellCheckerTestSpec extends FlatSpec { "a Spell Checker" should "work in a pipeline with Tokenizer" in { - import SparkAccessor.spark - import spark.implicits._ - val data = Seq("It was a cold , dreary day and the country was white with smow .", "He wos re1uctant to clange .", "he is gane .").toDF("text") @@ -138,8 +137,6 @@ class ContextSpellCheckerTestSpec extends FlatSpec { } - - "a Spell Checker" should "work in a light pipeline" in { import SparkAccessor.spark import spark.implicits._ @@ -163,10 +160,7 @@ class ContextSpellCheckerTestSpec extends FlatSpec { val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, spellChecker)).fit(Seq.empty[String].toDF("text")) val lp = new LightPipeline(pipeline) - lp.annotate(data) - lp.annotate(data) - lp.annotate(data) - + lp.annotate(data ++ data ++ data) }