From 1096cbb3678e094f29150aa9f9c14c6c1fecc0d2 Mon Sep 17 00:00:00 2001 From: Alberto Date: Wed, 17 Apr 2019 15:24:04 -0300 Subject: [PATCH 1/2] fixed concurrent access to TF in spell checker --- .../ml/tensorflow/TensorflowSpell.scala | 29 +++++++++---------- .../context/ContextSpellCheckerTestSpec.scala | 12 ++------ 2 files changed, 17 insertions(+), 24 deletions(-) diff --git a/src/main/scala/com/johnsnowlabs/ml/tensorflow/TensorflowSpell.scala b/src/main/scala/com/johnsnowlabs/ml/tensorflow/TensorflowSpell.scala index de589c0539be26..d514c39e53e607 100644 --- a/src/main/scala/com/johnsnowlabs/ml/tensorflow/TensorflowSpell.scala +++ b/src/main/scala/com/johnsnowlabs/ml/tensorflow/TensorflowSpell.scala @@ -16,30 +16,29 @@ class TensorflowSpell( val lossKey = "Add:0" val dropoutRate = "dropout_rate" - /* returns the loss associated with the last word, given previous history */ - def predict(dataset: Array[Array[Int]], cids: Array[Array[Int]], cwids:Array[Array[Int]]) = this.synchronized { - - val packed = dataset.zip(cids).zip(cwids).map { - case ((_ids, _cids), _cwids) => Array(_ids, _cids, _cwids) - } + // these are the inputs to the graph + val wordIds = "batches:0" + val contextIds = "batches:1" + val contextWordIds = "batches:2" - val tensors = new TensorResources() - val inputTensor = tensors.createTensor(packed) + /* returns the loss associated with the last word, given previous history */ + def predict(dataset: Array[Array[Int]], cids: Array[Array[Int]], cwids:Array[Array[Int]]) = { - tensorflow.session.runner - .feed(inMemoryInput, inputTensor) - .addTarget(testInitOp) - .run() + val tensors = new TensorResources val lossWords = tensorflow.session.runner .feed(dropoutRate, tensors.createTensor(1.0f)) + .feed(wordIds, tensors.createTensor(dataset.map(_.dropRight(1)))) + .feed(contextIds, tensors.createTensor(cids.map(_.tail))) + .feed(contextWordIds, tensors.createTensor(cwids.map(_.tail))) .fetch(lossKey) .fetch(validWords) .run() - val result = extractFloats(lossWords.get(0)) - val width = inputTensor.shape()(2) - result.grouped(width.toInt - 1).map(_.last) + tensors.clearTensors() + val result = extractFloats(lossWords.get(0)) + val width = dataset.head.length + result.grouped(width - 1).map(_.last) } } diff --git a/src/test/scala/com/johnsnowlabs/nlp/annotators/spell/context/ContextSpellCheckerTestSpec.scala b/src/test/scala/com/johnsnowlabs/nlp/annotators/spell/context/ContextSpellCheckerTestSpec.scala index e09acd3d2af43f..7f1662f9e622ea 100644 --- a/src/test/scala/com/johnsnowlabs/nlp/annotators/spell/context/ContextSpellCheckerTestSpec.scala +++ b/src/test/scala/com/johnsnowlabs/nlp/annotators/spell/context/ContextSpellCheckerTestSpec.scala @@ -6,6 +6,8 @@ import com.johnsnowlabs.nlp.annotators.spell.context.parser._ import com.johnsnowlabs.nlp.{Annotation, DocumentAssembler, LightPipeline, SparkAccessor} import org.apache.spark.ml.Pipeline import org.scalatest._ +import SparkAccessor.spark +import spark.implicits._ class ContextSpellCheckerTestSpec extends FlatSpec { @@ -63,9 +65,6 @@ class ContextSpellCheckerTestSpec extends FlatSpec { "a Spell Checker" should "work in a pipeline with Tokenizer" in { - import SparkAccessor.spark - import spark.implicits._ - val data = Seq("It was a cold , dreary day and the country was white with smow .", "He wos re1uctant to clange .", "he is gane .").toDF("text") @@ -94,8 +93,6 @@ class ContextSpellCheckerTestSpec extends FlatSpec { } - - "a Spell Checker" should "work in a light pipeline" in { import SparkAccessor.spark import spark.implicits._ @@ -119,10 +116,7 @@ class ContextSpellCheckerTestSpec extends FlatSpec { val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, spellChecker)).fit(Seq.empty[String].toDF("text")) val lp = new LightPipeline(pipeline) - lp.annotate(data) - lp.annotate(data) - lp.annotate(data) - + lp.annotate(data ++ data ++ data) } From 0f84d5ee433485f1711ff0928d2dee9614d2008e Mon Sep 17 00:00:00 2001 From: Saif Addin Date: Fri, 26 Apr 2019 10:59:14 -0300 Subject: [PATCH 2/2] Open Slack --- README.md | 2 +- docs/index.html | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 7d83d4ebd9e8fe..78fc45a07f8b9e 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,7 @@ Take a look at our official spark-nlp page: http://nlp.johnsnowlabs.com/ for use ## Slack community channel -Questions? Feedback? Request access sending an email to nlp@johnsnowlabs.com +[Join Slack](https://join.slack.com/t/spark-nlp/shared_invite/enQtNjA4MTE2MDI1MDkxLTM4ZDliMjU5OWZmMDE1ZGVkMjg0MWFjMjU3NjY4YThlMTJkNmNjNjM3NTMwYzlhMWY4MGMzODI2NDBkOWU4ZDE) ## Table of contents diff --git a/docs/index.html b/docs/index.html index 112e45dba3e54d..49e32770586c8a 100644 --- a/docs/index.html +++ b/docs/index.html @@ -76,7 +76,7 @@

High Performance NLP with Apache Spark

distributed large scale environment.

- Questions? Join our Slack + Questions? Join our Slack

2019 March 23rd - Update! 2.0.1 Released! Bert embeddings, embeddings as annotators, better OCR, new pretrained pipelines and much more!