Skip to content

Commit

Permalink
Merge branch 'master' into 202-release-candidate
Browse files Browse the repository at this point in the history
# Conflicts:
#	src/main/scala/com/johnsnowlabs/ml/tensorflow/TensorflowSpell.scala
  • Loading branch information
saif-ellafi committed Apr 29, 2019
2 parents c247b17 + 0977e10 commit 0991ad5
Show file tree
Hide file tree
Showing 4 changed files with 17 additions and 28 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ Take a look at our official spark-nlp page: http://nlp.johnsnowlabs.com/ for use

## Slack community channel

Questions? Feedback? Request access sending an email to nlp@johnsnowlabs.com
[Join Slack](https://join.slack.com/t/spark-nlp/shared_invite/enQtNjA4MTE2MDI1MDkxLTM4ZDliMjU5OWZmMDE1ZGVkMjg0MWFjMjU3NjY4YThlMTJkNmNjNjM3NTMwYzlhMWY4MGMzODI2NDBkOWU4ZDE)

## Table of contents

Expand Down
2 changes: 1 addition & 1 deletion docs/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ <h2 class="title">High Performance NLP with Apache Spark </h2>
distributed
large scale environment.
</p>
<a class="btn btn-info btn-cta" style="float: center;margin-top: 10px;" href="mailto:nlp@johnsnowlabs.com?subject=SparkNLP%20Slack%20access" target="_blank"> Questions? Join our Slack</a>
<a class="btn btn-info btn-cta" style="float: center;margin-top: 10px;" href="https://join.slack.com/t/spark-nlp/shared_invite/enQtNjA4MTE2MDI1MDkxLTM4ZDliMjU5OWZmMDE1ZGVkMjg0MWFjMjU3NjY4YThlMTJkNmNjNjM3NTMwYzlhMWY4MGMzODI2NDBkOWU4ZDE" target="_blank"> Questions? Join our Slack</a>
<b/><p/><p/>
<p><span class="label label-warning">2019 March 23rd - Update!</span> 2.0.1 Released! Bert embeddings, embeddings as annotators, better OCR, new pretrained pipelines and much more!</p>
</div>
Expand Down
29 changes: 12 additions & 17 deletions src/main/scala/com/johnsnowlabs/ml/tensorflow/TensorflowSpell.scala
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
package com.johnsnowlabs.ml.tensorflow

import java.lang.reflect.Modifier

import com.johnsnowlabs.ml.tensorflow.TensorResources.extractFloats
import com.johnsnowlabs.nlp.annotators.ner.Verbose

Expand All @@ -18,32 +16,29 @@ class TensorflowSpell(
val lossKey = "Add:0"
val dropoutRate = "dropout_rate"

/* returns the loss associated with the last word, given previous history */
def predict(dataset: Array[Array[Int]], cids: Array[Array[Int]], cwids:Array[Array[Int]]) = this.synchronized {

val packed = dataset.zip(cids).zip(cwids).map {
case ((_ids, _cids), _cwids) => Array(_ids, _cids, _cwids)
}
// these are the inputs to the graph
val wordIds = "batches:0"
val contextIds = "batches:1"
val contextWordIds = "batches:2"

val tensors = new TensorResources()
val inputTensor = tensors.createTensor(packed)
/* returns the loss associated with the last word, given previous history */
def predict(dataset: Array[Array[Int]], cids: Array[Array[Int]], cwids:Array[Array[Int]]) = {

tensorflow.getSession.runner
.feed(inMemoryInput, inputTensor)
.addTarget(testInitOp)
.run()
val tensors = new TensorResources

val lossWords = tensorflow.getSession.runner
.feed(dropoutRate, tensors.createTensor(1.0f))
.feed(wordIds, tensors.createTensor(dataset.map(_.dropRight(1))))
.feed(contextIds, tensors.createTensor(cids.map(_.tail)))
.feed(contextWordIds, tensors.createTensor(cwids.map(_.tail)))
.fetch(lossKey)
.fetch(validWords)
.run()

tensors.clearTensors()

val result = extractFloats(lossWords.get(0))
val width = inputTensor.shape()(2)
result.grouped(width.toInt - 1).map(_.last)

val width = dataset.head.length
result.grouped(width - 1).map(_.last)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ import org.apache.hadoop.fs.FileSystem
import org.apache.spark.ml.Pipeline
import org.apache.spark.sql.SparkSession
import org.scalatest._
import SparkAccessor.spark
import spark.implicits._


class ContextSpellCheckerTestSpec extends FlatSpec {
Expand Down Expand Up @@ -107,9 +109,6 @@ class ContextSpellCheckerTestSpec extends FlatSpec {


"a Spell Checker" should "work in a pipeline with Tokenizer" in {
import SparkAccessor.spark
import spark.implicits._

val data = Seq("It was a cold , dreary day and the country was white with smow .",
"He wos re1uctant to clange .",
"he is gane .").toDF("text")
Expand Down Expand Up @@ -138,8 +137,6 @@ class ContextSpellCheckerTestSpec extends FlatSpec {

}



"a Spell Checker" should "work in a light pipeline" in {
import SparkAccessor.spark
import spark.implicits._
Expand All @@ -163,10 +160,7 @@ class ContextSpellCheckerTestSpec extends FlatSpec {

val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, spellChecker)).fit(Seq.empty[String].toDF("text"))
val lp = new LightPipeline(pipeline)
lp.annotate(data)
lp.annotate(data)
lp.annotate(data)

lp.annotate(data ++ data ++ data)
}


Expand Down

0 comments on commit 0991ad5

Please sign in to comment.