From 439ba66e3bedd6245a29874da8ecfc5463599e8f Mon Sep 17 00:00:00 2001 From: Stefano Lori Date: Sat, 21 Jan 2023 14:22:57 +0100 Subject: [PATCH 01/26] Added doc similarity ranker annotator template --- .gitignore | 1 + .../com/johnsnowlabs/nlp/AnnotatorType.scala | 2 +- .../similarity/DocumentSimilarityRanker.scala | 85 +++++++++++++++++++ .../DocumentSimilarityRankerTestSpec.scala | 74 ++++++++++++++++ 4 files changed, 161 insertions(+), 1 deletion(-) create mode 100644 src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/DocumentSimilarityRanker.scala create mode 100644 src/test/scala/com/johnsnowlabs/nlp/similarity/DocumentSimilarityRankerTestSpec.scala diff --git a/.gitignore b/.gitignore index fdb9a78394ce68..09dc65b7b1069a 100644 --- a/.gitignore +++ b/.gitignore @@ -331,3 +331,4 @@ src/*/resources/*.classes .bsp/sbt.json python/docs/_build/** python/docs/reference/_autosummary/** +/noninttokens/ diff --git a/src/main/scala/com/johnsnowlabs/nlp/AnnotatorType.scala b/src/main/scala/com/johnsnowlabs/nlp/AnnotatorType.scala index 6a51a15b9e83cd..7e420f7f65eb43 100644 --- a/src/main/scala/com/johnsnowlabs/nlp/AnnotatorType.scala +++ b/src/main/scala/com/johnsnowlabs/nlp/AnnotatorType.scala @@ -38,5 +38,5 @@ object AnnotatorType { val NODE = "node" val TABLE = "table" val DUMMY = "dummy" - + val DOC_SIMILARITY_RANKINGS = "doc_similarity_rankings" } diff --git a/src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/DocumentSimilarityRanker.scala b/src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/DocumentSimilarityRanker.scala new file mode 100644 index 00000000000000..ea7cf934afd682 --- /dev/null +++ b/src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/DocumentSimilarityRanker.scala @@ -0,0 +1,85 @@ +package com.johnsnowlabs.nlp.annotators.similarity + +import com.johnsnowlabs.nlp.AnnotatorType.{DOC_SIMILARITY_RANKINGS, SENTENCE_EMBEDDINGS} +import com.johnsnowlabs.nlp.{Annotation, AnnotatorModel, HasSimpleAnnotate} +import org.apache.spark.ml.param.Param +import org.apache.spark.ml.util.{DefaultParamsReadable, Identifiable} + +class DocumentSimilarityRanker(override val uid: String) + extends AnnotatorModel[DocumentSimilarityRanker] + with HasSimpleAnnotate[DocumentSimilarityRanker] { + +// setDefault( +// inputCols -> Array(SENTENCE_EMBEDDINGS), +// outputCol -> DOC_SIMILARITY_RANKINGS, +// similarityMethod -> "brp", +// numberOfNeighbours -> 10 +// ) + + def this() = this(Identifiable.randomUID("DOC_SIMILARITY_RANKER")) + + def setSimilarityMethod(simMethod: String): this.type = set(similarityMethod, simMethod) + + def getSimilarityMethod: String = $(similarityMethod) + + def setQuery(q: String): this.type = set(query, q) + + def getQuery: String = $(query) + + def setNumberOfNeighbours(nbOfNeighbours: Int): this.type = set(numberOfNeighbours, nbOfNeighbours) + + def getNumberOfNeighbours: Int = $(numberOfNeighbours) + + /** Choose how you would like to aggregate Word Embeddings to Sentence Embeddings (Default: + * `"AVERAGE"`). Can either be `"AVERAGE"` or `"SUM"`. + * + * @group param + */ + val numberOfNeighbours = new Param[Int]( + this, + "numberOfNeighbours", + "Choose how you would like to aggregate Word Embeddings to Sentence Embeddings: AVERAGE or SUM") + + /** Choose how you would like to aggregate Word Embeddings to Sentence Embeddings (Default: + * `"AVERAGE"`). Can either be `"AVERAGE"` or `"SUM"`. + * + * @group param + */ + val similarityMethod = new Param[String]( + this, + "similarityMethod", + "Choose how you would like to aggregate Word Embeddings to Sentence Embeddings: AVERAGE or SUM") + + /** Choose how you would like to aggregate Word Embeddings to Sentence Embeddings (Default: + * `"AVERAGE"`). Can either be `"AVERAGE"` or `"SUM"`. + * + * @group param + */ + val query = new Param[String]( + this, + "query", + "Choose how you would like to aggregate Word Embeddings to Sentence Embeddings: AVERAGE or SUM") + + + /** takes a document and annotations and produces new annotations of this annotator's annotation + * type + * + * @param annotations + * Annotations that correspond to inputAnnotationCols generated by previous annotators if any + * @return + * any number of annotations processed for every input annotation. Not necessary one to one + * relationship + */ + override def annotate(annotations: Seq[Annotation]): Seq[Annotation] = { + println("into the annotator") + Seq.empty + } + + /** Annotator reference id. Used to identify elements in metadata or to refer to this annotator + * type + */ + override val inputAnnotatorTypes: Array[AnnotatorType] = Array(SENTENCE_EMBEDDINGS) + override val outputAnnotatorType: AnnotatorType = DOC_SIMILARITY_RANKINGS +} + +object DocumentSimilarityRanker extends DefaultParamsReadable[DocumentSimilarityRanker] \ No newline at end of file diff --git a/src/test/scala/com/johnsnowlabs/nlp/similarity/DocumentSimilarityRankerTestSpec.scala b/src/test/scala/com/johnsnowlabs/nlp/similarity/DocumentSimilarityRankerTestSpec.scala new file mode 100644 index 00000000000000..bbda321ea982eb --- /dev/null +++ b/src/test/scala/com/johnsnowlabs/nlp/similarity/DocumentSimilarityRankerTestSpec.scala @@ -0,0 +1,74 @@ +package com.johnsnowlabs.nlp.similarity + +import com.johnsnowlabs.nlp.AnnotatorType.{DOC_SIMILARITY_RANKINGS, SENTENCE_EMBEDDINGS} +import com.johnsnowlabs.nlp.annotators.Tokenizer +import com.johnsnowlabs.nlp.annotators.sbd.pragmatic.SentenceDetector +import com.johnsnowlabs.nlp.annotators.similarity.DocumentSimilarityRanker +import com.johnsnowlabs.nlp.base.DocumentAssembler +import com.johnsnowlabs.nlp.embeddings.SentenceEmbeddings +import com.johnsnowlabs.nlp.util.io.ResourceHelper +import com.johnsnowlabs.nlp.{AnnotatorBuilder, EmbeddingsFinisher} +import com.johnsnowlabs.tags.SlowTest +import org.apache.spark.ml.Pipeline +import org.apache.spark.sql.SparkSession +import org.scalatest.flatspec.AnyFlatSpec + +class DocumentSimilarityRankerTestSpec extends AnyFlatSpec { + val spark: SparkSession = ResourceHelper.spark + + "DocumentSimilarityRanker" should "should rank document similarity" taggedAs SlowTest in { + val smallCorpus = ResourceHelper.spark.read + .option("header", "true") + .csv("src/test/resources/embeddings/sentence_embeddings.csv") + + val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + + val sentence = new SentenceDetector() + .setInputCols("document") + .setOutputCol("sentence") + + val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + + val embeddings = AnnotatorBuilder + .getGLoveEmbeddings(smallCorpus) + .setInputCols("document", "token") + .setOutputCol("embeddings") + .setCaseSensitive(false) + + val embeddingsSentence = new SentenceEmbeddings() + .setInputCols(Array("document", "embeddings")) + .setOutputCol("sentence_embeddings") + .setPoolingStrategy("AVERAGE") + + val sentenceFinisher = new EmbeddingsFinisher() + .setInputCols("sentence_embeddings") + .setOutputCols("finished_sentence_embeddings") + .setCleanAnnotations(false) + + val similarityRanker = new DocumentSimilarityRanker() + .setInputCols("sentence_embeddings") + .setOutputCol(DOC_SIMILARITY_RANKINGS) + .setSimilarityMethod("brp") + .setQuery("ciao") + .setNumberOfNeighbours(10) + + val pipeline = new Pipeline() + .setStages( + Array( + documentAssembler, + sentence, + tokenizer, + embeddings, + embeddingsSentence, + sentenceFinisher, + similarityRanker)) + + val pipelineDF = pipeline.fit(smallCorpus).transform(smallCorpus) + + pipelineDF.show + } +} \ No newline at end of file From fda156cc64e72c956897d859fbd092865cf7291a Mon Sep 17 00:00:00 2001 From: Stefano Lori Date: Thu, 16 Feb 2023 21:40:45 +0100 Subject: [PATCH 02/26] Created ranker model --- .../DocumentSimilarityRankerApproach.scala | 21 +++++++++ ...la => DocumentSimilarityRankerModel.scala} | 44 ++++++------------- .../DocumentSimilarityRankerTestSpec.scala | 7 +-- 3 files changed, 39 insertions(+), 33 deletions(-) create mode 100644 src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/DocumentSimilarityRankerApproach.scala rename src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/{DocumentSimilarityRanker.scala => DocumentSimilarityRankerModel.scala} (67%) diff --git a/src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/DocumentSimilarityRankerApproach.scala b/src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/DocumentSimilarityRankerApproach.scala new file mode 100644 index 00000000000000..9011e9ff290fed --- /dev/null +++ b/src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/DocumentSimilarityRankerApproach.scala @@ -0,0 +1,21 @@ +package com.johnsnowlabs.nlp.annotators.similarity + +import com.johnsnowlabs.nlp.{AnnotatorApproach, ParamsAndFeaturesWritable} +import org.apache.spark.ml.PipelineModel +import org.apache.spark.sql.Dataset + + +class DocumentSimilarityRankerApproach(override val uid: String) +extends AnnotatorApproach[DocumentSimilarityRankerModel] +with ParamsAndFeaturesWritable +{ + override val description: AnnotatorType = ??? + + override def train(dataset: Dataset[_], recursivePipeline: Option[PipelineModel]): DocumentSimilarityRankerModel = ??? + + override val outputAnnotatorType: AnnotatorType = ??? + /** Annotator reference id. Used to identify elements in metadata or to refer to this annotator + * type + */ + override val inputAnnotatorTypes: Array[AnnotatorType] = ??? +} diff --git a/src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/DocumentSimilarityRanker.scala b/src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/DocumentSimilarityRankerModel.scala similarity index 67% rename from src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/DocumentSimilarityRanker.scala rename to src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/DocumentSimilarityRankerModel.scala index ea7cf934afd682..a388231008fe15 100644 --- a/src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/DocumentSimilarityRanker.scala +++ b/src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/DocumentSimilarityRankerModel.scala @@ -1,20 +1,17 @@ package com.johnsnowlabs.nlp.annotators.similarity import com.johnsnowlabs.nlp.AnnotatorType.{DOC_SIMILARITY_RANKINGS, SENTENCE_EMBEDDINGS} -import com.johnsnowlabs.nlp.{Annotation, AnnotatorModel, HasSimpleAnnotate} +import com.johnsnowlabs.nlp.{Annotation, AnnotatorApproach, AnnotatorModel, HasSimpleAnnotate} import org.apache.spark.ml.param.Param import org.apache.spark.ml.util.{DefaultParamsReadable, Identifiable} -class DocumentSimilarityRanker(override val uid: String) - extends AnnotatorModel[DocumentSimilarityRanker] - with HasSimpleAnnotate[DocumentSimilarityRanker] { +class DocumentSimilarityRankerModel(override val uid: String) + extends AnnotatorModel[DocumentSimilarityRankerModel] + with HasSimpleAnnotate[DocumentSimilarityRankerModel] { -// setDefault( -// inputCols -> Array(SENTENCE_EMBEDDINGS), -// outputCol -> DOC_SIMILARITY_RANKINGS, -// similarityMethod -> "brp", -// numberOfNeighbours -> 10 -// ) + override val inputAnnotatorTypes: Array[AnnotatorType] = Array(SENTENCE_EMBEDDINGS) + + override val outputAnnotatorType: AnnotatorType = DOC_SIMILARITY_RANKINGS def this() = this(Identifiable.randomUID("DOC_SIMILARITY_RANKER")) @@ -22,10 +19,6 @@ class DocumentSimilarityRanker(override val uid: String) def getSimilarityMethod: String = $(similarityMethod) - def setQuery(q: String): this.type = set(query, q) - - def getQuery: String = $(query) - def setNumberOfNeighbours(nbOfNeighbours: Int): this.type = set(numberOfNeighbours, nbOfNeighbours) def getNumberOfNeighbours: Int = $(numberOfNeighbours) @@ -50,16 +43,12 @@ class DocumentSimilarityRanker(override val uid: String) "similarityMethod", "Choose how you would like to aggregate Word Embeddings to Sentence Embeddings: AVERAGE or SUM") - /** Choose how you would like to aggregate Word Embeddings to Sentence Embeddings (Default: - * `"AVERAGE"`). Can either be `"AVERAGE"` or `"SUM"`. - * - * @group param - */ - val query = new Param[String]( - this, - "query", - "Choose how you would like to aggregate Word Embeddings to Sentence Embeddings: AVERAGE or SUM") - + setDefault( + inputCols -> Array(SENTENCE_EMBEDDINGS), + outputCol -> DOC_SIMILARITY_RANKINGS, + similarityMethod -> "brp", + numberOfNeighbours -> 10 + ) /** takes a document and annotations and produces new annotations of this annotator's annotation * type @@ -75,11 +64,6 @@ class DocumentSimilarityRanker(override val uid: String) Seq.empty } - /** Annotator reference id. Used to identify elements in metadata or to refer to this annotator - * type - */ - override val inputAnnotatorTypes: Array[AnnotatorType] = Array(SENTENCE_EMBEDDINGS) - override val outputAnnotatorType: AnnotatorType = DOC_SIMILARITY_RANKINGS } -object DocumentSimilarityRanker extends DefaultParamsReadable[DocumentSimilarityRanker] \ No newline at end of file +object DocumentSimilarityRanker extends DefaultParamsReadable[DocumentSimilarityRankerModel] \ No newline at end of file diff --git a/src/test/scala/com/johnsnowlabs/nlp/similarity/DocumentSimilarityRankerTestSpec.scala b/src/test/scala/com/johnsnowlabs/nlp/similarity/DocumentSimilarityRankerTestSpec.scala index bbda321ea982eb..014dc760f22602 100644 --- a/src/test/scala/com/johnsnowlabs/nlp/similarity/DocumentSimilarityRankerTestSpec.scala +++ b/src/test/scala/com/johnsnowlabs/nlp/similarity/DocumentSimilarityRankerTestSpec.scala @@ -3,7 +3,7 @@ package com.johnsnowlabs.nlp.similarity import com.johnsnowlabs.nlp.AnnotatorType.{DOC_SIMILARITY_RANKINGS, SENTENCE_EMBEDDINGS} import com.johnsnowlabs.nlp.annotators.Tokenizer import com.johnsnowlabs.nlp.annotators.sbd.pragmatic.SentenceDetector -import com.johnsnowlabs.nlp.annotators.similarity.DocumentSimilarityRanker +import com.johnsnowlabs.nlp.annotators.similarity.{DocumentSimilarityRanker, DocumentSimilarityRankerModel} import com.johnsnowlabs.nlp.base.DocumentAssembler import com.johnsnowlabs.nlp.embeddings.SentenceEmbeddings import com.johnsnowlabs.nlp.util.io.ResourceHelper @@ -49,11 +49,10 @@ class DocumentSimilarityRankerTestSpec extends AnyFlatSpec { .setOutputCols("finished_sentence_embeddings") .setCleanAnnotations(false) - val similarityRanker = new DocumentSimilarityRanker() + val similarityRanker = new DocumentSimilarityRankerModel() .setInputCols("sentence_embeddings") .setOutputCol(DOC_SIMILARITY_RANKINGS) .setSimilarityMethod("brp") - .setQuery("ciao") .setNumberOfNeighbours(10) val pipeline = new Pipeline() @@ -69,6 +68,8 @@ class DocumentSimilarityRankerTestSpec extends AnyFlatSpec { val pipelineDF = pipeline.fit(smallCorpus).transform(smallCorpus) + pipelineDF.printSchema pipelineDF.show + } } \ No newline at end of file From e3e7c01c298ae8a674d545f0cfe40cf03fe98986 Mon Sep 17 00:00:00 2001 From: Stefano Lori Date: Thu, 16 Feb 2023 21:51:22 +0100 Subject: [PATCH 03/26] gitignore modified --- .gitignore | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 09dc65b7b1069a..aa264460d82d79 100644 --- a/.gitignore +++ b/.gitignore @@ -293,6 +293,9 @@ venv.bak/ # mypy .mypy_cache/ +# VS Code +**/.vscode + ### SBT ### # Simple Build Tool # http://www.scala-sbt.org/release/docs/Getting-Started/Directories.html#configuring-version-control @@ -324,6 +327,7 @@ python/tensorflow/bert/models/** **/tmp_* docs/_site/** docs/.sass-cache/** +docs/*.json tst_shortcut_sd/ src/*/resources/*.classes /word_segmenter_metrics/ @@ -331,4 +335,6 @@ src/*/resources/*.classes .bsp/sbt.json python/docs/_build/** python/docs/reference/_autosummary/** -/noninttokens/ + +# MS Visio Code +**/.vscode/ From ee81d37d2d82cdda93d1116cc0bbe200b37fc177 Mon Sep 17 00:00:00 2001 From: Stefano Lori Date: Mon, 27 Feb 2023 22:00:56 +0100 Subject: [PATCH 04/26] Added params to LSH models --- .../DocumentSimilarityRankerApproach.scala | 76 +++++++++++++++++-- .../DocumentSimilarityRankerModel.scala | 64 +++++++++++----- .../DocumentSimilarityRankerTestSpec.scala | 4 +- 3 files changed, 118 insertions(+), 26 deletions(-) diff --git a/src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/DocumentSimilarityRankerApproach.scala b/src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/DocumentSimilarityRankerApproach.scala index 9011e9ff290fed..6dba362fe0b494 100644 --- a/src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/DocumentSimilarityRankerApproach.scala +++ b/src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/DocumentSimilarityRankerApproach.scala @@ -1,21 +1,85 @@ package com.johnsnowlabs.nlp.annotators.similarity -import com.johnsnowlabs.nlp.{AnnotatorApproach, ParamsAndFeaturesWritable} +import com.johnsnowlabs.nlp.AnnotatorType.{DOC_SIMILARITY_RANKINGS, SENTENCE_EMBEDDINGS} +import com.johnsnowlabs.nlp.{AnnotatorApproach, HasEnableCachingProperties} +import com.johnsnowlabs.storage.HasStorageRef import org.apache.spark.ml.PipelineModel +import org.apache.spark.ml.param.Param import org.apache.spark.sql.Dataset - class DocumentSimilarityRankerApproach(override val uid: String) extends AnnotatorApproach[DocumentSimilarityRankerModel] -with ParamsAndFeaturesWritable -{ + with HasStorageRef + with HasEnableCachingProperties { override val description: AnnotatorType = ??? - override def train(dataset: Dataset[_], recursivePipeline: Option[PipelineModel]): DocumentSimilarityRankerModel = ??? - override val outputAnnotatorType: AnnotatorType = ??? /** Annotator reference id. Used to identify elements in metadata or to refer to this annotator * type */ override val inputAnnotatorTypes: Array[AnnotatorType] = ??? + + /** The similarity method used to calculate the neighbours. + * (Default: `"brp"`, Bucketed Random Projection for Euclidean Distance) + * + * @group param + */ + val similarityMethod = new Param[String]( + this, + "similarityMethod", + """The similarity method used to calculate the neighbours. + |(Default: `"brp"`, Bucketed Random Projection for Euclidean Distance) + |""".stripMargin) + + def setSimilarityMethod(value: String): this.type = set(similarityMethod, value) + + def getSimilarityMethod: String = $(similarityMethod) + + /** The number of neighbours the model will return (Default:`"10"`). + * + * @group param + */ + val numberOfNeighbours = new Param[Int]( + this, + "numberOfNeighbours", + """The number of neighbours the model will return (Default:`"10"`)""") + + def setNumberOfNeighbours(value: Int): this.type = set(numberOfNeighbours, value) + + def getNumberOfNeighbours: Int = $(numberOfNeighbours) + + val bucketLength = new Param[Double]( + this, + "bucketLength", + """The bucket length that controls the average size of hash buckets. + |A larger bucket length (i.e., fewer buckets) increases the probability of features being hashed + |to the same bucket (increasing the numbers of true and false positives) + |""".stripMargin) + + def setBucketLength(value: Double): this.type = set(bucketLength, value) + + def getBucketLength: Double = $(bucketLength) + + val numHashTables = new Param[Int]( + this, + "numHashTables", + """number of hash tables, where increasing number of hash tables lowers the false negative rate, + |and decreasing it improves the running performance. + |""".stripMargin) + + def setNumHashTables(value: Int): this.type = set(numHashTables, value) + + def getNumHashTables: Int = $(numHashTables) + + setDefault( + inputCols -> Array(SENTENCE_EMBEDDINGS), + outputCol -> DOC_SIMILARITY_RANKINGS, + similarityMethod -> "brp", + numberOfNeighbours -> 10, + bucketLength -> 2.0, + numHashTables -> 3 + ) + + override def train(dataset: Dataset[_], recursivePipeline: Option[PipelineModel]): DocumentSimilarityRankerModel = ??? + } diff --git a/src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/DocumentSimilarityRankerModel.scala b/src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/DocumentSimilarityRankerModel.scala index a388231008fe15..b7368fae44181b 100644 --- a/src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/DocumentSimilarityRankerModel.scala +++ b/src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/DocumentSimilarityRankerModel.scala @@ -1,7 +1,7 @@ package com.johnsnowlabs.nlp.annotators.similarity import com.johnsnowlabs.nlp.AnnotatorType.{DOC_SIMILARITY_RANKINGS, SENTENCE_EMBEDDINGS} -import com.johnsnowlabs.nlp.{Annotation, AnnotatorApproach, AnnotatorModel, HasSimpleAnnotate} +import com.johnsnowlabs.nlp.{Annotation, AnnotatorModel, HasSimpleAnnotate} import org.apache.spark.ml.param.Param import org.apache.spark.ml.util.{DefaultParamsReadable, Identifiable} @@ -15,39 +15,64 @@ class DocumentSimilarityRankerModel(override val uid: String) def this() = this(Identifiable.randomUID("DOC_SIMILARITY_RANKER")) - def setSimilarityMethod(simMethod: String): this.type = set(similarityMethod, simMethod) + /** The similarity method used to calculate the neighbours. + * (Default: `"brp"`, Bucketed Random Projection for Euclidean Distance) + * + * @group param + */ + val similarityMethod = new Param[String]( + this, + "similarityMethod", + """The similarity method used to calculate the neighbours. + |(Default: `"brp"`, Bucketed Random Projection for Euclidean Distance) + |""".stripMargin) + def setSimilarityMethod(value: String): this.type = set(similarityMethod, value) def getSimilarityMethod: String = $(similarityMethod) - def setNumberOfNeighbours(nbOfNeighbours: Int): this.type = set(numberOfNeighbours, nbOfNeighbours) - - def getNumberOfNeighbours: Int = $(numberOfNeighbours) - - /** Choose how you would like to aggregate Word Embeddings to Sentence Embeddings (Default: - * `"AVERAGE"`). Can either be `"AVERAGE"` or `"SUM"`. + /** The number of neighbours the model will return (Default:`"10"`). * * @group param */ val numberOfNeighbours = new Param[Int]( this, "numberOfNeighbours", - "Choose how you would like to aggregate Word Embeddings to Sentence Embeddings: AVERAGE or SUM") + """The number of neighbours the model will return (Default:`"10"`)""") - /** Choose how you would like to aggregate Word Embeddings to Sentence Embeddings (Default: - * `"AVERAGE"`). Can either be `"AVERAGE"` or `"SUM"`. - * - * @group param - */ - val similarityMethod = new Param[String]( + def setNumberOfNeighbours(value: Int): this.type = set(numberOfNeighbours, value) + + def getNumberOfNeighbours: Int = $(numberOfNeighbours) + + val bucketLength = new Param[Double]( this, - "similarityMethod", - "Choose how you would like to aggregate Word Embeddings to Sentence Embeddings: AVERAGE or SUM") + "bucketLength", + """The bucket length that controls the average size of hash buckets. + |A larger bucket length (i.e., fewer buckets) increases the probability of features being hashed + |to the same bucket (increasing the numbers of true and false positives) + |""".stripMargin) + + def setBucketLength(value: Double): this.type = set(bucketLength, value) + + def getBucketLength: Double = $(bucketLength) + + val numHashTables = new Param[Int]( + this, + "numHashTables", + """number of hash tables, where increasing number of hash tables lowers the false negative rate, + |and decreasing it improves the running performance. + |""".stripMargin) + + def setNumHashTables(value: Int): this.type = set(numHashTables, value) + + def getNumHashTables: Int = $(numHashTables) setDefault( inputCols -> Array(SENTENCE_EMBEDDINGS), outputCol -> DOC_SIMILARITY_RANKINGS, similarityMethod -> "brp", - numberOfNeighbours -> 10 + numberOfNeighbours -> 10, + bucketLength -> 2.0, + numHashTables -> 3 ) /** takes a document and annotations and produces new annotations of this annotator's annotation @@ -60,6 +85,9 @@ class DocumentSimilarityRankerModel(override val uid: String) * relationship */ override def annotate(annotations: Seq[Annotation]): Seq[Annotation] = { + // for each sentence embedding annotation we use the trained DocumentSimilarityRankerApproach\ + // to produce the top-N ANN of it + // FIXME reporting distance spread ? println("into the annotator") Seq.empty } diff --git a/src/test/scala/com/johnsnowlabs/nlp/similarity/DocumentSimilarityRankerTestSpec.scala b/src/test/scala/com/johnsnowlabs/nlp/similarity/DocumentSimilarityRankerTestSpec.scala index 014dc760f22602..c0ffee1040c955 100644 --- a/src/test/scala/com/johnsnowlabs/nlp/similarity/DocumentSimilarityRankerTestSpec.scala +++ b/src/test/scala/com/johnsnowlabs/nlp/similarity/DocumentSimilarityRankerTestSpec.scala @@ -49,7 +49,7 @@ class DocumentSimilarityRankerTestSpec extends AnyFlatSpec { .setOutputCols("finished_sentence_embeddings") .setCleanAnnotations(false) - val similarityRanker = new DocumentSimilarityRankerModel() + val docSimilarityRanker = new DocumentSimilarityRankerModel() .setInputCols("sentence_embeddings") .setOutputCol(DOC_SIMILARITY_RANKINGS) .setSimilarityMethod("brp") @@ -64,7 +64,7 @@ class DocumentSimilarityRankerTestSpec extends AnyFlatSpec { embeddings, embeddingsSentence, sentenceFinisher, - similarityRanker)) + docSimilarityRanker)) val pipelineDF = pipeline.fit(smallCorpus).transform(smallCorpus) From b5b9a632aef96acbf0f282128019b89b6320e9ca Mon Sep 17 00:00:00 2001 From: Stefano Lori Date: Tue, 28 Feb 2023 22:37:58 +0100 Subject: [PATCH 05/26] Added BRP LSH as annotator engine --- .../DocumentSimilarityRankerApproach.scala | 24 ++++++++++++++++++- .../DocumentSimilarityRankerModel.scala | 10 +++++++- 2 files changed, 32 insertions(+), 2 deletions(-) diff --git a/src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/DocumentSimilarityRankerApproach.scala b/src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/DocumentSimilarityRankerApproach.scala index 6dba362fe0b494..ad52982d755fce 100644 --- a/src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/DocumentSimilarityRankerApproach.scala +++ b/src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/DocumentSimilarityRankerApproach.scala @@ -4,6 +4,7 @@ import com.johnsnowlabs.nlp.AnnotatorType.{DOC_SIMILARITY_RANKINGS, SENTENCE_EMB import com.johnsnowlabs.nlp.{AnnotatorApproach, HasEnableCachingProperties} import com.johnsnowlabs.storage.HasStorageRef import org.apache.spark.ml.PipelineModel +import org.apache.spark.ml.feature.BucketedRandomProjectionLSH import org.apache.spark.ml.param.Param import org.apache.spark.sql.Dataset @@ -80,6 +81,27 @@ extends AnnotatorApproach[DocumentSimilarityRankerModel] numHashTables -> 3 ) - override def train(dataset: Dataset[_], recursivePipeline: Option[PipelineModel]): DocumentSimilarityRankerModel = ??? + val INPUT_COL_FEATURES = "features" + + val OUTPUT_COL_HASHES = "hashes" + + override def train(dataset: Dataset[_], recursivePipeline: Option[PipelineModel]): DocumentSimilarityRankerModel = { + val lsh = $(similarityMethod) match { + case "brp" => new BucketedRandomProjectionLSH() + .setBucketLength($(bucketLength)) + .setNumHashTables($(numHashTables)) + .setInputCol(INPUT_COL_FEATURES) + .setOutputCol(OUTPUT_COL_HASHES) + case _ => throw new IllegalArgumentException(s"${$(similarityMethod)} is not a valid value.") + } + + val model = lsh.fit(dataset) + + new DocumentSimilarityRankerModel() + .setBucketLength($(bucketLength)) + .setNumHashTables($(numHashTables)) + .setNumberOfNeighbours($(numberOfNeighbours)) + .setLSHModel(model) + } } diff --git a/src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/DocumentSimilarityRankerModel.scala b/src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/DocumentSimilarityRankerModel.scala index b7368fae44181b..0f55e1db28e866 100644 --- a/src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/DocumentSimilarityRankerModel.scala +++ b/src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/DocumentSimilarityRankerModel.scala @@ -2,6 +2,7 @@ package com.johnsnowlabs.nlp.annotators.similarity import com.johnsnowlabs.nlp.AnnotatorType.{DOC_SIMILARITY_RANKINGS, SENTENCE_EMBEDDINGS} import com.johnsnowlabs.nlp.{Annotation, AnnotatorModel, HasSimpleAnnotate} +import org.apache.spark.ml.feature.BucketedRandomProjectionLSHModel import org.apache.spark.ml.param.Param import org.apache.spark.ml.util.{DefaultParamsReadable, Identifiable} @@ -66,6 +67,13 @@ class DocumentSimilarityRankerModel(override val uid: String) def getNumHashTables: Int = $(numHashTables) + val similarityModel = new Param[BucketedRandomProjectionLSHModel]( + this, + "similarityModel", "similarityModel LSH based") + def setLSHModel(value: BucketedRandomProjectionLSHModel): this.type = set(similarityModel, value) + + def getLSHModel: BucketedRandomProjectionLSHModel = $(similarityModel) + setDefault( inputCols -> Array(SENTENCE_EMBEDDINGS), outputCol -> DOC_SIMILARITY_RANKINGS, @@ -89,7 +97,7 @@ class DocumentSimilarityRankerModel(override val uid: String) // to produce the top-N ANN of it // FIXME reporting distance spread ? println("into the annotator") - Seq.empty + Seq(Annotation("ciao")) } } From 9053be47fb9c3a7cf6143d1d1d2f43af19062f57 Mon Sep 17 00:00:00 2001 From: Stefano Lori Date: Tue, 28 Feb 2023 23:11:46 +0100 Subject: [PATCH 06/26] Added replace features col with embeddings --- .../similarity/DocumentSimilarityRankerApproach.scala | 7 ++++++- .../similarity/DocumentSimilarityRankerModel.scala | 11 ++++++++--- .../similarity/DocumentSimilarityRankerTestSpec.scala | 2 +- 3 files changed, 15 insertions(+), 5 deletions(-) diff --git a/src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/DocumentSimilarityRankerApproach.scala b/src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/DocumentSimilarityRankerApproach.scala index ad52982d755fce..c69548ec7a8bd3 100644 --- a/src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/DocumentSimilarityRankerApproach.scala +++ b/src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/DocumentSimilarityRankerApproach.scala @@ -7,6 +7,7 @@ import org.apache.spark.ml.PipelineModel import org.apache.spark.ml.feature.BucketedRandomProjectionLSH import org.apache.spark.ml.param.Param import org.apache.spark.sql.Dataset +import org.apache.spark.sql.functions.col class DocumentSimilarityRankerApproach(override val uid: String) extends AnnotatorApproach[DocumentSimilarityRankerModel] @@ -95,7 +96,11 @@ extends AnnotatorApproach[DocumentSimilarityRankerModel] case _ => throw new IllegalArgumentException(s"${$(similarityMethod)} is not a valid value.") } - val model = lsh.fit(dataset) + val embeddingsDataset = dataset.withColumn("features", col("sentence_embeddings.embeddings")) + + embeddingsDataset.show() + + val model = lsh.fit(embeddingsDataset) new DocumentSimilarityRankerModel() .setBucketLength($(bucketLength)) diff --git a/src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/DocumentSimilarityRankerModel.scala b/src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/DocumentSimilarityRankerModel.scala index 0f55e1db28e866..d3c65cad818100 100644 --- a/src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/DocumentSimilarityRankerModel.scala +++ b/src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/DocumentSimilarityRankerModel.scala @@ -93,11 +93,16 @@ class DocumentSimilarityRankerModel(override val uid: String) * relationship */ override def annotate(annotations: Seq[Annotation]): Seq[Annotation] = { - // for each sentence embedding annotation we use the trained DocumentSimilarityRankerApproach\ - // to produce the top-N ANN of it - // FIXME reporting distance spread ? println("into the annotator") Seq(Annotation("ciao")) + +// iterate over input annotations +// - select the input ID based on result input text? +// - replace features value with embeddings +// - calculate the N closest IDs with their distances +// - insert closest and distance in as metadata arrays of N=numNeighbors +// - forward the embeddings in the metadata embeddings +// $(similarityModel).transform() } } diff --git a/src/test/scala/com/johnsnowlabs/nlp/similarity/DocumentSimilarityRankerTestSpec.scala b/src/test/scala/com/johnsnowlabs/nlp/similarity/DocumentSimilarityRankerTestSpec.scala index c0ffee1040c955..e4048380858e80 100644 --- a/src/test/scala/com/johnsnowlabs/nlp/similarity/DocumentSimilarityRankerTestSpec.scala +++ b/src/test/scala/com/johnsnowlabs/nlp/similarity/DocumentSimilarityRankerTestSpec.scala @@ -69,7 +69,7 @@ class DocumentSimilarityRankerTestSpec extends AnyFlatSpec { val pipelineDF = pipeline.fit(smallCorpus).transform(smallCorpus) pipelineDF.printSchema - pipelineDF.show + pipelineDF.show(false) } } \ No newline at end of file From 7dd00bb4bba7de72c193d029c1f5d186d4d47182 Mon Sep 17 00:00:00 2001 From: Stefano Lori Date: Thu, 2 Mar 2023 22:03:44 +0100 Subject: [PATCH 07/26] Added LSH logic on vector cast --- .../DocumentSimilarityRankerApproach.scala | 54 ++++++++----- .../DocumentSimilarityRankerModel.scala | 75 ++++++++++++++----- .../DocumentSimilarityRankerTestSpec.scala | 10 ++- 3 files changed, 98 insertions(+), 41 deletions(-) diff --git a/src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/DocumentSimilarityRankerApproach.scala b/src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/DocumentSimilarityRankerApproach.scala index c69548ec7a8bd3..e1a2429b349f37 100644 --- a/src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/DocumentSimilarityRankerApproach.scala +++ b/src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/DocumentSimilarityRankerApproach.scala @@ -4,22 +4,27 @@ import com.johnsnowlabs.nlp.AnnotatorType.{DOC_SIMILARITY_RANKINGS, SENTENCE_EMB import com.johnsnowlabs.nlp.{AnnotatorApproach, HasEnableCachingProperties} import com.johnsnowlabs.storage.HasStorageRef import org.apache.spark.ml.PipelineModel -import org.apache.spark.ml.feature.BucketedRandomProjectionLSH +import org.apache.spark.ml.feature.{BucketedRandomProjectionLSH, VectorAssembler} +import org.apache.spark.ml.functions.array_to_vector import org.apache.spark.ml.param.Param +import org.apache.spark.ml.util.Identifiable import org.apache.spark.sql.Dataset -import org.apache.spark.sql.functions.col +import org.apache.spark.sql.functions.{col, expr, flatten} class DocumentSimilarityRankerApproach(override val uid: String) -extends AnnotatorApproach[DocumentSimilarityRankerModel] - with HasStorageRef - with HasEnableCachingProperties { - override val description: AnnotatorType = ??? + extends AnnotatorApproach[DocumentSimilarityRankerModel] + with HasStorageRef + with HasEnableCachingProperties { + + override val description: AnnotatorType = "LSH based document similarity annotator" - override val outputAnnotatorType: AnnotatorType = ??? /** Annotator reference id. Used to identify elements in metadata or to refer to this annotator * type */ - override val inputAnnotatorTypes: Array[AnnotatorType] = ??? + def this() = this(Identifiable.randomUID("DocumentSimilarityRankerApproach")) + + override val inputAnnotatorTypes: Array[AnnotatorType] = Array(SENTENCE_EMBEDDINGS) + override val outputAnnotatorType: AnnotatorType = DOC_SIMILARITY_RANKINGS /** The similarity method used to calculate the neighbours. * (Default: `"brp"`, Bucketed Random Projection for Euclidean Distance) @@ -82,31 +87,40 @@ extends AnnotatorApproach[DocumentSimilarityRankerModel] numHashTables -> 3 ) - val INPUT_COL_FEATURES = "features" + val LSH_INPUT_COL_NAME = "features" - val OUTPUT_COL_HASHES = "hashes" + val LSH_OUTPUT_COL_NAME = "hashes" override def train(dataset: Dataset[_], recursivePipeline: Option[PipelineModel]): DocumentSimilarityRankerModel = { val lsh = $(similarityMethod) match { case "brp" => new BucketedRandomProjectionLSH() .setBucketLength($(bucketLength)) .setNumHashTables($(numHashTables)) - .setInputCol(INPUT_COL_FEATURES) - .setOutputCol(OUTPUT_COL_HASHES) + .setInputCol(LSH_INPUT_COL_NAME) + .setOutputCol(LSH_OUTPUT_COL_NAME) case _ => throw new IllegalArgumentException(s"${$(similarityMethod)} is not a valid value.") } - val embeddingsDataset = dataset.withColumn("features", col("sentence_embeddings.embeddings")) + val embeddingsDataset = dataset.withColumn(LSH_INPUT_COL_NAME, col("sentence_embeddings.embeddings")) + embeddingsDataset.select(LSH_INPUT_COL_NAME).show(false) - embeddingsDataset.show() + val lshDataset = embeddingsDataset + .withColumn(s"$LSH_INPUT_COL_NAME", flatten(col(s"$LSH_INPUT_COL_NAME"))) + .withColumn(s"$LSH_INPUT_COL_NAME", array_to_vector(col(s"$LSH_INPUT_COL_NAME"))) + // .select(expr(s"transform($LSH_INPUT_COL_NAME, x -> x[0])").as(s"$LSH_INPUT_COL_NAME")) + lshDataset.show(false) - val model = lsh.fit(embeddingsDataset) + val model = lsh.fit(lshDataset) + + val datasetMf = Map("lshDataset" -> lshDataset) + val modelMf = Map("similarityModel" -> model) new DocumentSimilarityRankerModel() - .setBucketLength($(bucketLength)) - .setNumHashTables($(numHashTables)) - .setNumberOfNeighbours($(numberOfNeighbours)) - .setLSHModel(model) + .setLshInputColName(LSH_INPUT_COL_NAME) + .setLshBucketLength($(bucketLength)) + .setLshNumHashTables($(numHashTables)) + .setLshNumberOfNeighbours($(numberOfNeighbours)) + .setSimilarityModel(modelMf) + .setDataset(datasetMf) } - } diff --git a/src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/DocumentSimilarityRankerModel.scala b/src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/DocumentSimilarityRankerModel.scala index d3c65cad818100..8f2f3eacc24712 100644 --- a/src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/DocumentSimilarityRankerModel.scala +++ b/src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/DocumentSimilarityRankerModel.scala @@ -1,14 +1,22 @@ package com.johnsnowlabs.nlp.annotators.similarity import com.johnsnowlabs.nlp.AnnotatorType.{DOC_SIMILARITY_RANKINGS, SENTENCE_EMBEDDINGS} -import com.johnsnowlabs.nlp.{Annotation, AnnotatorModel, HasSimpleAnnotate} -import org.apache.spark.ml.feature.BucketedRandomProjectionLSHModel +import com.johnsnowlabs.nlp.embeddings.HasEmbeddingsProperties +import com.johnsnowlabs.nlp.serialization.MapFeature +import com.johnsnowlabs.nlp.{Annotation, AnnotatorModel, HasSimpleAnnotate, ParamsAndFeaturesWritable} +import com.johnsnowlabs.storage.HasStorageRef +import org.apache.hadoop.shaded.org.eclipse.jetty.websocket.common.frames.DataFrame +import org.apache.spark.ml.feature.{BucketedRandomProjectionLSH, BucketedRandomProjectionLSHModel} import org.apache.spark.ml.param.Param import org.apache.spark.ml.util.{DefaultParamsReadable, Identifiable} +import org.apache.spark.sql.{Dataset, SparkSession} class DocumentSimilarityRankerModel(override val uid: String) extends AnnotatorModel[DocumentSimilarityRankerModel] - with HasSimpleAnnotate[DocumentSimilarityRankerModel] { + with HasSimpleAnnotate[DocumentSimilarityRankerModel] + with HasStorageRef + with HasEmbeddingsProperties + with ParamsAndFeaturesWritable { override val inputAnnotatorTypes: Array[AnnotatorType] = Array(SENTENCE_EMBEDDINGS) @@ -16,6 +24,17 @@ class DocumentSimilarityRankerModel(override val uid: String) def this() = this(Identifiable.randomUID("DOC_SIMILARITY_RANKER")) + val lshInputColName = new Param[String]( + this, + "similarityMethod", + """ Input col name for similarity LSH model. + |(Default: `"brp"`, Bucketed Random Projection for Euclidean Distance) + |""".stripMargin) + + def setLshInputColName(value: String): this.type = set(lshInputColName, value) + + def getLshInputColName: String = $(lshInputColName) + /** The similarity method used to calculate the neighbours. * (Default: `"brp"`, Bucketed Random Projection for Euclidean Distance) * @@ -27,6 +46,7 @@ class DocumentSimilarityRankerModel(override val uid: String) """The similarity method used to calculate the neighbours. |(Default: `"brp"`, Bucketed Random Projection for Euclidean Distance) |""".stripMargin) + def setSimilarityMethod(value: String): this.type = set(similarityMethod, value) def getSimilarityMethod: String = $(similarityMethod) @@ -40,7 +60,7 @@ class DocumentSimilarityRankerModel(override val uid: String) "numberOfNeighbours", """The number of neighbours the model will return (Default:`"10"`)""") - def setNumberOfNeighbours(value: Int): this.type = set(numberOfNeighbours, value) + def setLshNumberOfNeighbours(value: Int): this.type = set(numberOfNeighbours, value) def getNumberOfNeighbours: Int = $(numberOfNeighbours) @@ -52,7 +72,7 @@ class DocumentSimilarityRankerModel(override val uid: String) |to the same bucket (increasing the numbers of true and false positives) |""".stripMargin) - def setBucketLength(value: Double): this.type = set(bucketLength, value) + def setLshBucketLength(value: Double): this.type = set(bucketLength, value) def getBucketLength: Double = $(bucketLength) @@ -63,16 +83,28 @@ class DocumentSimilarityRankerModel(override val uid: String) |and decreasing it improves the running performance. |""".stripMargin) - def setNumHashTables(value: Int): this.type = set(numHashTables, value) + def setLshNumHashTables(value: Int): this.type = set(numHashTables, value) def getNumHashTables: Int = $(numHashTables) - val similarityModel = new Param[BucketedRandomProjectionLSHModel]( - this, - "similarityModel", "similarityModel LSH based") - def setLSHModel(value: BucketedRandomProjectionLSHModel): this.type = set(similarityModel, value) + val similarityModel: MapFeature[String, BucketedRandomProjectionLSHModel] = new MapFeature(this, "similarityModel") - def getLSHModel: BucketedRandomProjectionLSHModel = $(similarityModel) + def setSimilarityModel(value: Map[String, BucketedRandomProjectionLSHModel]): this.type = set(similarityModel, value) + + def generateDefaultLSHModel(value: String): BucketedRandomProjectionLSHModel = { + value match { + case "brp" => new BucketedRandomProjectionLSH().fit(getDataset) + } + } + + def getSimilarityModel: BucketedRandomProjectionLSHModel = + $$(similarityModel).getOrElse("similarityModel", generateDefaultLSHModel("brp")) + + val dataset: MapFeature[String, Dataset[_]] = new MapFeature(this, "dataset") + + def setDataset(value: Map[String, Dataset[_]]): this.type = set(dataset, value) + + def getDataset: Dataset[_] = $$(dataset).getOrElse("dataset", null) setDefault( inputCols -> Array(SENTENCE_EMBEDDINGS), @@ -96,13 +128,20 @@ class DocumentSimilarityRankerModel(override val uid: String) println("into the annotator") Seq(Annotation("ciao")) -// iterate over input annotations -// - select the input ID based on result input text? -// - replace features value with embeddings -// - calculate the N closest IDs with their distances -// - insert closest and distance in as metadata arrays of N=numNeighbors -// - forward the embeddings in the metadata embeddings -// $(similarityModel).transform() + // iterate over input annotations + // - select the input ID based on result input text? + // - replace features value with embeddings + // - calculate the N closest IDs with their distances + // - insert closest and distance in as metadata arrays of N=numNeighbors + // - forward the embeddings in the metadata embeddings + // $(similarityModel).transform() + +// val _transformed = getSimilarityModel.transform(getDataset) + +// val retrieved = $$(similarityModel).getOrElse("similarityModel", null) + + println("Transformed") + Seq.empty } } diff --git a/src/test/scala/com/johnsnowlabs/nlp/similarity/DocumentSimilarityRankerTestSpec.scala b/src/test/scala/com/johnsnowlabs/nlp/similarity/DocumentSimilarityRankerTestSpec.scala index e4048380858e80..a788d3a05109fa 100644 --- a/src/test/scala/com/johnsnowlabs/nlp/similarity/DocumentSimilarityRankerTestSpec.scala +++ b/src/test/scala/com/johnsnowlabs/nlp/similarity/DocumentSimilarityRankerTestSpec.scala @@ -3,7 +3,7 @@ package com.johnsnowlabs.nlp.similarity import com.johnsnowlabs.nlp.AnnotatorType.{DOC_SIMILARITY_RANKINGS, SENTENCE_EMBEDDINGS} import com.johnsnowlabs.nlp.annotators.Tokenizer import com.johnsnowlabs.nlp.annotators.sbd.pragmatic.SentenceDetector -import com.johnsnowlabs.nlp.annotators.similarity.{DocumentSimilarityRanker, DocumentSimilarityRankerModel} +import com.johnsnowlabs.nlp.annotators.similarity.{DocumentSimilarityRanker, DocumentSimilarityRankerApproach, DocumentSimilarityRankerModel} import com.johnsnowlabs.nlp.base.DocumentAssembler import com.johnsnowlabs.nlp.embeddings.SentenceEmbeddings import com.johnsnowlabs.nlp.util.io.ResourceHelper @@ -49,12 +49,14 @@ class DocumentSimilarityRankerTestSpec extends AnyFlatSpec { .setOutputCols("finished_sentence_embeddings") .setCleanAnnotations(false) - val docSimilarityRanker = new DocumentSimilarityRankerModel() + val docSimilarityRanker = new DocumentSimilarityRankerApproach() .setInputCols("sentence_embeddings") .setOutputCol(DOC_SIMILARITY_RANKINGS) .setSimilarityMethod("brp") .setNumberOfNeighbours(10) + // val docSimilarityFinalizer + val pipeline = new Pipeline() .setStages( Array( @@ -64,7 +66,9 @@ class DocumentSimilarityRankerTestSpec extends AnyFlatSpec { embeddings, embeddingsSentence, sentenceFinisher, - docSimilarityRanker)) + docSimilarityRanker +// docSimilarityFinalizer + )) val pipelineDF = pipeline.fit(smallCorpus).transform(smallCorpus) From 06a29403f53da111489801ad72c20ad2477bf648 Mon Sep 17 00:00:00 2001 From: Stefano Lori Date: Fri, 3 Mar 2023 23:17:04 +0100 Subject: [PATCH 08/26] Added skeleton for lsh doc sim ranker - WIP --- .../DocumentSimilarityRankerApproach.scala | 50 ++++--- .../DocumentSimilarityRankerModel.scala | 131 ++++-------------- 2 files changed, 62 insertions(+), 119 deletions(-) diff --git a/src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/DocumentSimilarityRankerApproach.scala b/src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/DocumentSimilarityRankerApproach.scala index e1a2429b349f37..4d7c165dc3d2f1 100644 --- a/src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/DocumentSimilarityRankerApproach.scala +++ b/src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/DocumentSimilarityRankerApproach.scala @@ -4,12 +4,14 @@ import com.johnsnowlabs.nlp.AnnotatorType.{DOC_SIMILARITY_RANKINGS, SENTENCE_EMB import com.johnsnowlabs.nlp.{AnnotatorApproach, HasEnableCachingProperties} import com.johnsnowlabs.storage.HasStorageRef import org.apache.spark.ml.PipelineModel -import org.apache.spark.ml.feature.{BucketedRandomProjectionLSH, VectorAssembler} +import org.apache.spark.ml.feature.{BucketedRandomProjectionLSH, BucketedRandomProjectionLSHModel, VectorAssembler} import org.apache.spark.ml.functions.array_to_vector +import org.apache.spark.ml.linalg.{DenseVector, Vectors} import org.apache.spark.ml.param.Param import org.apache.spark.ml.util.Identifiable -import org.apache.spark.sql.Dataset -import org.apache.spark.sql.functions.{col, expr, flatten} +import org.apache.spark.sql.expressions.Window +import org.apache.spark.sql.{DataFrame, Dataset} +import org.apache.spark.sql.functions.{col, expr, flatten, hash, monotonically_increasing_id, row_number} class DocumentSimilarityRankerApproach(override val uid: String) extends AnnotatorApproach[DocumentSimilarityRankerModel] @@ -91,6 +93,19 @@ class DocumentSimilarityRankerApproach(override val uid: String) val LSH_OUTPUT_COL_NAME = "hashes" + val INDEX_COL_NAME = "index" + + def getANN(model: BucketedRandomProjectionLSHModel, query: (Int, DenseVector), similarityDataset: DataFrame) = { + query match { + case (index, key) => + val similarRankedDocs = model.approxNearestNeighbors(similarityDataset, key, getNumberOfNeighbours) + val neighborsStr = similarRankedDocs.select(INDEX_COL_NAME).collect().map(_.getInt(0)).mkString("|") + index.toString.concat("=>").concat(neighborsStr) + } + } + + val INPUT_EMBEDDINGS = "sentence_embeddings.embeddings" + override def train(dataset: Dataset[_], recursivePipeline: Option[PipelineModel]): DocumentSimilarityRankerModel = { val lsh = $(similarityMethod) match { case "brp" => new BucketedRandomProjectionLSH() @@ -101,26 +116,27 @@ class DocumentSimilarityRankerApproach(override val uid: String) case _ => throw new IllegalArgumentException(s"${$(similarityMethod)} is not a valid value.") } - val embeddingsDataset = dataset.withColumn(LSH_INPUT_COL_NAME, col("sentence_embeddings.embeddings")) - embeddingsDataset.select(LSH_INPUT_COL_NAME).show(false) + val embeddingsDataset = dataset.withColumn(LSH_INPUT_COL_NAME, col(INPUT_EMBEDDINGS)) - val lshDataset = embeddingsDataset + val similarityDataset: DataFrame = embeddingsDataset .withColumn(s"$LSH_INPUT_COL_NAME", flatten(col(s"$LSH_INPUT_COL_NAME"))) .withColumn(s"$LSH_INPUT_COL_NAME", array_to_vector(col(s"$LSH_INPUT_COL_NAME"))) - // .select(expr(s"transform($LSH_INPUT_COL_NAME, x -> x[0])").as(s"$LSH_INPUT_COL_NAME")) - lshDataset.show(false) - val model = lsh.fit(lshDataset) + val model = lsh.fit(similarityDataset) + + val similarityDatasetWithIndex = similarityDataset +// .withColumn(INDEX_COL_NAME, row_number.over(Window.orderBy(monotonically_increasing_id)) - 1) + .withColumn(INDEX_COL_NAME, hash(col("text"))) + + val indexedVectorTuples = similarityDatasetWithIndex + .select(INDEX_COL_NAME, LSH_INPUT_COL_NAME) + .rdd + .map(x => (x.getAs(INDEX_COL_NAME), x.getAs(LSH_INPUT_COL_NAME))).collect() - val datasetMf = Map("lshDataset" -> lshDataset) - val modelMf = Map("similarityModel" -> model) + val similarityMappings: Array[String] = indexedVectorTuples + .map(query => getANN(model, query, similarityDatasetWithIndex)) new DocumentSimilarityRankerModel() - .setLshInputColName(LSH_INPUT_COL_NAME) - .setLshBucketLength($(bucketLength)) - .setLshNumHashTables($(numHashTables)) - .setLshNumberOfNeighbours($(numberOfNeighbours)) - .setSimilarityModel(modelMf) - .setDataset(datasetMf) + .setSimilarityMappings(Map("similarityMappings" -> similarityMappings)) } } diff --git a/src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/DocumentSimilarityRankerModel.scala b/src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/DocumentSimilarityRankerModel.scala index 8f2f3eacc24712..d45fc4d1c1a6ad 100644 --- a/src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/DocumentSimilarityRankerModel.scala +++ b/src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/DocumentSimilarityRankerModel.scala @@ -5,11 +5,9 @@ import com.johnsnowlabs.nlp.embeddings.HasEmbeddingsProperties import com.johnsnowlabs.nlp.serialization.MapFeature import com.johnsnowlabs.nlp.{Annotation, AnnotatorModel, HasSimpleAnnotate, ParamsAndFeaturesWritable} import com.johnsnowlabs.storage.HasStorageRef -import org.apache.hadoop.shaded.org.eclipse.jetty.websocket.common.frames.DataFrame -import org.apache.spark.ml.feature.{BucketedRandomProjectionLSH, BucketedRandomProjectionLSHModel} -import org.apache.spark.ml.param.Param import org.apache.spark.ml.util.{DefaultParamsReadable, Identifiable} -import org.apache.spark.sql.{Dataset, SparkSession} + +import scala.util.hashing.MurmurHash3 class DocumentSimilarityRankerModel(override val uid: String) extends AnnotatorModel[DocumentSimilarityRankerModel] @@ -24,95 +22,20 @@ class DocumentSimilarityRankerModel(override val uid: String) def this() = this(Identifiable.randomUID("DOC_SIMILARITY_RANKER")) - val lshInputColName = new Param[String]( - this, - "similarityMethod", - """ Input col name for similarity LSH model. - |(Default: `"brp"`, Bucketed Random Projection for Euclidean Distance) - |""".stripMargin) - - def setLshInputColName(value: String): this.type = set(lshInputColName, value) - - def getLshInputColName: String = $(lshInputColName) - - /** The similarity method used to calculate the neighbours. - * (Default: `"brp"`, Bucketed Random Projection for Euclidean Distance) - * - * @group param - */ - val similarityMethod = new Param[String]( - this, - "similarityMethod", - """The similarity method used to calculate the neighbours. - |(Default: `"brp"`, Bucketed Random Projection for Euclidean Distance) - |""".stripMargin) - - def setSimilarityMethod(value: String): this.type = set(similarityMethod, value) - - def getSimilarityMethod: String = $(similarityMethod) - - /** The number of neighbours the model will return (Default:`"10"`). + /** Dictionary of words with their vectors * * @group param */ - val numberOfNeighbours = new Param[Int]( - this, - "numberOfNeighbours", - """The number of neighbours the model will return (Default:`"10"`)""") - - def setLshNumberOfNeighbours(value: Int): this.type = set(numberOfNeighbours, value) - - def getNumberOfNeighbours: Int = $(numberOfNeighbours) - - val bucketLength = new Param[Double]( - this, - "bucketLength", - """The bucket length that controls the average size of hash buckets. - |A larger bucket length (i.e., fewer buckets) increases the probability of features being hashed - |to the same bucket (increasing the numbers of true and false positives) - |""".stripMargin) - - def setLshBucketLength(value: Double): this.type = set(bucketLength, value) + val similarityMappings: MapFeature[String, Array[String]] = new MapFeature(this, "similarityMappings") - def getBucketLength: Double = $(bucketLength) + /** @group setParam */ + def setSimilarityMappings(value: Map[String, Array[String]]): this.type = set(similarityMappings, value) - val numHashTables = new Param[Int]( - this, - "numHashTables", - """number of hash tables, where increasing number of hash tables lowers the false negative rate, - |and decreasing it improves the running performance. - |""".stripMargin) - - def setLshNumHashTables(value: Int): this.type = set(numHashTables, value) - - def getNumHashTables: Int = $(numHashTables) - - val similarityModel: MapFeature[String, BucketedRandomProjectionLSHModel] = new MapFeature(this, "similarityModel") - - def setSimilarityModel(value: Map[String, BucketedRandomProjectionLSHModel]): this.type = set(similarityModel, value) - - def generateDefaultLSHModel(value: String): BucketedRandomProjectionLSHModel = { - value match { - case "brp" => new BucketedRandomProjectionLSH().fit(getDataset) - } - } - - def getSimilarityModel: BucketedRandomProjectionLSHModel = - $$(similarityModel).getOrElse("similarityModel", generateDefaultLSHModel("brp")) - - val dataset: MapFeature[String, Dataset[_]] = new MapFeature(this, "dataset") - - def setDataset(value: Map[String, Dataset[_]]): this.type = set(dataset, value) - - def getDataset: Dataset[_] = $$(dataset).getOrElse("dataset", null) + def getSimilarityMappings: Array[String] = $$(similarityMappings).getOrElse("similarityMappings", Array("")) setDefault( inputCols -> Array(SENTENCE_EMBEDDINGS), - outputCol -> DOC_SIMILARITY_RANKINGS, - similarityMethod -> "brp", - numberOfNeighbours -> 10, - bucketLength -> 2.0, - numHashTables -> 3 + outputCol -> DOC_SIMILARITY_RANKINGS ) /** takes a document and annotations and produces new annotations of this annotator's annotation @@ -125,25 +48,29 @@ class DocumentSimilarityRankerModel(override val uid: String) * relationship */ override def annotate(annotations: Seq[Annotation]): Seq[Annotation] = { - println("into the annotator") - Seq(Annotation("ciao")) - - // iterate over input annotations - // - select the input ID based on result input text? - // - replace features value with embeddings - // - calculate the N closest IDs with their distances - // - insert closest and distance in as metadata arrays of N=numNeighbors - // - forward the embeddings in the metadata embeddings - // $(similarityModel).transform() -// val _transformed = getSimilarityModel.transform(getDataset) - -// val retrieved = $$(similarityModel).getOrElse("similarityModel", null) - - println("Transformed") - Seq.empty + // 1=>0|1 + val mappings: Map[String, String] = getSimilarityMappings + .map(s => s.split("=>")) + .map { case Array(index, neighbors) => (index, neighbors) } + .toMap + + annotations.map( + annotation => { + val inputResult = annotation.result + val indexTarget = MurmurHash3.stringHash(inputResult, MurmurHash3.stringSeed).toString + val neighbors: String = mappings.getOrElse(indexTarget, "NA") + + Annotation( + annotatorType = outputAnnotatorType, + begin = annotation.begin, + end = annotation.end, + result = annotation.result, + metadata = annotation.metadata + ("id"-> indexTarget) + ("neighbors" -> neighbors) , + embeddings = annotation.embeddings) + } + ) } - } object DocumentSimilarityRanker extends DefaultParamsReadable[DocumentSimilarityRankerModel] \ No newline at end of file From 8a91494deeb21ad2cc4a941053a8091fbc470a4b Mon Sep 17 00:00:00 2001 From: Stefano Lori Date: Sat, 4 Mar 2023 10:45:09 +0100 Subject: [PATCH 09/26] Fixed mh3 hash calculation --- .../DocumentSimilarityRankerApproach.scala | 21 +++++++++++++------ .../DocumentSimilarityRankerModel.scala | 1 + .../DocumentSimilarityRankerTestSpec.scala | 2 +- 3 files changed, 17 insertions(+), 7 deletions(-) diff --git a/src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/DocumentSimilarityRankerApproach.scala b/src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/DocumentSimilarityRankerApproach.scala index 4d7c165dc3d2f1..b7465f689de5c6 100644 --- a/src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/DocumentSimilarityRankerApproach.scala +++ b/src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/DocumentSimilarityRankerApproach.scala @@ -10,8 +10,10 @@ import org.apache.spark.ml.linalg.{DenseVector, Vectors} import org.apache.spark.ml.param.Param import org.apache.spark.ml.util.Identifiable import org.apache.spark.sql.expressions.Window -import org.apache.spark.sql.{DataFrame, Dataset} -import org.apache.spark.sql.functions.{col, expr, flatten, hash, monotonically_increasing_id, row_number} +import org.apache.spark.sql.{DataFrame, Dataset, SparkSession} +import org.apache.spark.sql.functions.{col, expr, flatten, hash, monotonically_increasing_id, row_number, udf} + +import scala.util.hashing.MurmurHash3 class DocumentSimilarityRankerApproach(override val uid: String) extends AnnotatorApproach[DocumentSimilarityRankerModel] @@ -95,12 +97,12 @@ class DocumentSimilarityRankerApproach(override val uid: String) val INDEX_COL_NAME = "index" - def getANN(model: BucketedRandomProjectionLSHModel, query: (Int, DenseVector), similarityDataset: DataFrame) = { + def getANN(model: BucketedRandomProjectionLSHModel, query: (String, DenseVector), similarityDataset: DataFrame) = { query match { case (index, key) => val similarRankedDocs = model.approxNearestNeighbors(similarityDataset, key, getNumberOfNeighbours) - val neighborsStr = similarRankedDocs.select(INDEX_COL_NAME).collect().map(_.getInt(0)).mkString("|") - index.toString.concat("=>").concat(neighborsStr) + val neighborsStr = similarRankedDocs.select(INDEX_COL_NAME).collect().map(_.getString(0)).mkString("|") + index.concat("=>").concat(neighborsStr) } } @@ -124,9 +126,16 @@ class DocumentSimilarityRankerApproach(override val uid: String) val model = lsh.fit(similarityDataset) + val mh3UDF = udf{ + (s: String) => MurmurHash3.stringHash(s, MurmurHash3.stringSeed).toString + } +// SparkSession.builder().getOrCreate().udf.register("hashSimilarityIndex", hashUDF) + val similarityDatasetWithIndex = similarityDataset // .withColumn(INDEX_COL_NAME, row_number.over(Window.orderBy(monotonically_increasing_id)) - 1) - .withColumn(INDEX_COL_NAME, hash(col("text"))) + .withColumn(INDEX_COL_NAME, mh3UDF(col("text"))) + + similarityDatasetWithIndex.show val indexedVectorTuples = similarityDatasetWithIndex .select(INDEX_COL_NAME, LSH_INPUT_COL_NAME) diff --git a/src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/DocumentSimilarityRankerModel.scala b/src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/DocumentSimilarityRankerModel.scala index d45fc4d1c1a6ad..0bfbcfe4186046 100644 --- a/src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/DocumentSimilarityRankerModel.scala +++ b/src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/DocumentSimilarityRankerModel.scala @@ -6,6 +6,7 @@ import com.johnsnowlabs.nlp.serialization.MapFeature import com.johnsnowlabs.nlp.{Annotation, AnnotatorModel, HasSimpleAnnotate, ParamsAndFeaturesWritable} import com.johnsnowlabs.storage.HasStorageRef import org.apache.spark.ml.util.{DefaultParamsReadable, Identifiable} +import org.apache.spark.sql.functions.col import scala.util.hashing.MurmurHash3 diff --git a/src/test/scala/com/johnsnowlabs/nlp/similarity/DocumentSimilarityRankerTestSpec.scala b/src/test/scala/com/johnsnowlabs/nlp/similarity/DocumentSimilarityRankerTestSpec.scala index a788d3a05109fa..f915a63d0ed7bb 100644 --- a/src/test/scala/com/johnsnowlabs/nlp/similarity/DocumentSimilarityRankerTestSpec.scala +++ b/src/test/scala/com/johnsnowlabs/nlp/similarity/DocumentSimilarityRankerTestSpec.scala @@ -53,7 +53,7 @@ class DocumentSimilarityRankerTestSpec extends AnyFlatSpec { .setInputCols("sentence_embeddings") .setOutputCol(DOC_SIMILARITY_RANKINGS) .setSimilarityMethod("brp") - .setNumberOfNeighbours(10) + .setNumberOfNeighbours(1) // val docSimilarityFinalizer From 6c7c475d0c160e069a61ac532eacdedf11e993c2 Mon Sep 17 00:00:00 2001 From: Stefano Lori Date: Sat, 4 Mar 2023 11:24:28 +0100 Subject: [PATCH 10/26] Fixed dataset assertions id vs neghbours --- .../DocumentSimilarityRankerApproach.scala | 7 +------ .../similarity/DocumentSimilarityRankerModel.scala | 1 - .../DocumentSimilarityRankerTestSpec.scala | 13 ++++++++++--- 3 files changed, 11 insertions(+), 10 deletions(-) diff --git a/src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/DocumentSimilarityRankerApproach.scala b/src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/DocumentSimilarityRankerApproach.scala index b7465f689de5c6..d1105ef7c9a10d 100644 --- a/src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/DocumentSimilarityRankerApproach.scala +++ b/src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/DocumentSimilarityRankerApproach.scala @@ -129,13 +129,8 @@ class DocumentSimilarityRankerApproach(override val uid: String) val mh3UDF = udf{ (s: String) => MurmurHash3.stringHash(s, MurmurHash3.stringSeed).toString } -// SparkSession.builder().getOrCreate().udf.register("hashSimilarityIndex", hashUDF) - val similarityDatasetWithIndex = similarityDataset -// .withColumn(INDEX_COL_NAME, row_number.over(Window.orderBy(monotonically_increasing_id)) - 1) - .withColumn(INDEX_COL_NAME, mh3UDF(col("text"))) - - similarityDatasetWithIndex.show + val similarityDatasetWithIndex = similarityDataset.withColumn(INDEX_COL_NAME, mh3UDF(col("text"))) val indexedVectorTuples = similarityDatasetWithIndex .select(INDEX_COL_NAME, LSH_INPUT_COL_NAME) diff --git a/src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/DocumentSimilarityRankerModel.scala b/src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/DocumentSimilarityRankerModel.scala index 0bfbcfe4186046..810c54595c7c43 100644 --- a/src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/DocumentSimilarityRankerModel.scala +++ b/src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/DocumentSimilarityRankerModel.scala @@ -50,7 +50,6 @@ class DocumentSimilarityRankerModel(override val uid: String) */ override def annotate(annotations: Seq[Annotation]): Seq[Annotation] = { - // 1=>0|1 val mappings: Map[String, String] = getSimilarityMappings .map(s => s.split("=>")) .map { case Array(index, neighbors) => (index, neighbors) } diff --git a/src/test/scala/com/johnsnowlabs/nlp/similarity/DocumentSimilarityRankerTestSpec.scala b/src/test/scala/com/johnsnowlabs/nlp/similarity/DocumentSimilarityRankerTestSpec.scala index f915a63d0ed7bb..5f3d1c0180bed5 100644 --- a/src/test/scala/com/johnsnowlabs/nlp/similarity/DocumentSimilarityRankerTestSpec.scala +++ b/src/test/scala/com/johnsnowlabs/nlp/similarity/DocumentSimilarityRankerTestSpec.scala @@ -17,9 +17,16 @@ class DocumentSimilarityRankerTestSpec extends AnyFlatSpec { val spark: SparkSession = ResourceHelper.spark "DocumentSimilarityRanker" should "should rank document similarity" taggedAs SlowTest in { - val smallCorpus = ResourceHelper.spark.read - .option("header", "true") - .csv("src/test/resources/embeddings/sentence_embeddings.csv") + + val smallCorpus = spark.createDataFrame( + List( + "First document, this is my first sentence. This is my second sentence.", + "Second document, this is my first sentence. This is my second sentence.", + "Third document, climate change is arguably one of the most pressing problems of our time.", + "Fourth document, Florence in Italy, is among the most beautiful cities in Europe.", + "Fifth document, The French Riviera is the Mediterranean coastline of the southeast corner of France.", + ).map(Tuple1(_))) + .toDF("text") val documentAssembler = new DocumentAssembler() .setInputCol("text") From b7742f0348b794e634a9b2846072204b2ca61860 Mon Sep 17 00:00:00 2001 From: Stefano Lori Date: Mon, 6 Mar 2023 09:26:14 +0100 Subject: [PATCH 11/26] Converting neighbours result string to map --- .../DocumentSimilarityRankerApproach.scala | 54 ++++++++++++------- .../DocumentSimilarityRankerModel.scala | 22 ++++---- .../DocumentSimilarityRankerTestSpec.scala | 29 +++++++--- 3 files changed, 66 insertions(+), 39 deletions(-) diff --git a/src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/DocumentSimilarityRankerApproach.scala b/src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/DocumentSimilarityRankerApproach.scala index d1105ef7c9a10d..1b2279c30330e7 100644 --- a/src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/DocumentSimilarityRankerApproach.scala +++ b/src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/DocumentSimilarityRankerApproach.scala @@ -28,8 +28,19 @@ class DocumentSimilarityRankerApproach(override val uid: String) def this() = this(Identifiable.randomUID("DocumentSimilarityRankerApproach")) override val inputAnnotatorTypes: Array[AnnotatorType] = Array(SENTENCE_EMBEDDINGS) + override val outputAnnotatorType: AnnotatorType = DOC_SIMILARITY_RANKINGS + val LSH_INPUT_COL_NAME = "features" + + val LSH_OUTPUT_COL_NAME = "hashes" + + val INDEX_COL_NAME = "index" + + val INPUT_EMBEDDINGS = "sentence_embeddings.embeddings" + + val TEXT = "text" + /** The similarity method used to calculate the neighbours. * (Default: `"brp"`, Bucketed Random Projection for Euclidean Distance) * @@ -91,24 +102,23 @@ class DocumentSimilarityRankerApproach(override val uid: String) numHashTables -> 3 ) - val LSH_INPUT_COL_NAME = "features" - - val LSH_OUTPUT_COL_NAME = "hashes" - - val INDEX_COL_NAME = "index" - - def getANN(model: BucketedRandomProjectionLSHModel, query: (String, DenseVector), similarityDataset: DataFrame) = { + def getNeighboursRankedDocIndexes(model: BucketedRandomProjectionLSHModel, + query: (Int, DenseVector), + similarityDataset: DataFrame) = { query match { - case (index, key) => - val similarRankedDocs = model.approxNearestNeighbors(similarityDataset, key, getNumberOfNeighbours) - val neighborsStr = similarRankedDocs.select(INDEX_COL_NAME).collect().map(_.getString(0)).mkString("|") - index.concat("=>").concat(neighborsStr) + case (index, queryVector) => + val similarRankedDocs = model.approxNearestNeighbors(similarityDataset, queryVector, getNumberOfNeighbours) + val neighboursRankedDocIndexes: Array[Int] = similarRankedDocs + .select(INDEX_COL_NAME) + .collect() + .map(_.getInt(0)) + + (index, neighboursRankedDocIndexes) } } - val INPUT_EMBEDDINGS = "sentence_embeddings.embeddings" - override def train(dataset: Dataset[_], recursivePipeline: Option[PipelineModel]): DocumentSimilarityRankerModel = { + val lsh = $(similarityMethod) match { case "brp" => new BucketedRandomProjectionLSH() .setBucketLength($(bucketLength)) @@ -126,21 +136,25 @@ class DocumentSimilarityRankerApproach(override val uid: String) val model = lsh.fit(similarityDataset) - val mh3UDF = udf{ - (s: String) => MurmurHash3.stringHash(s, MurmurHash3.stringSeed).toString + val mh3UDF = udf { + (s: String) => MurmurHash3.stringHash(s, MurmurHash3.stringSeed) } - val similarityDatasetWithIndex = similarityDataset.withColumn(INDEX_COL_NAME, mh3UDF(col("text"))) + val similarityDatasetWithIndex = similarityDataset.withColumn(INDEX_COL_NAME, mh3UDF(col(TEXT))) val indexedVectorTuples = similarityDatasetWithIndex .select(INDEX_COL_NAME, LSH_INPUT_COL_NAME) .rdd - .map(x => (x.getAs(INDEX_COL_NAME), x.getAs(LSH_INPUT_COL_NAME))).collect() + .map(x => (x.getAs[Int](INDEX_COL_NAME), x.getAs[DenseVector](LSH_INPUT_COL_NAME))) + .collect() - val similarityMappings: Array[String] = indexedVectorTuples - .map(query => getANN(model, query, similarityDatasetWithIndex)) + val similarityMappings: Map[Int, Array[Int]] = indexedVectorTuples + .map(query => getNeighboursRankedDocIndexes(model, query, similarityDatasetWithIndex)) + .toMap new DocumentSimilarityRankerModel() - .setSimilarityMappings(Map("similarityMappings" -> similarityMappings)) + .setSimilarityMappings( + Map("similarityMappings" -> similarityMappings) + ) } } diff --git a/src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/DocumentSimilarityRankerModel.scala b/src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/DocumentSimilarityRankerModel.scala index 810c54595c7c43..522a1b2077af61 100644 --- a/src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/DocumentSimilarityRankerModel.scala +++ b/src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/DocumentSimilarityRankerModel.scala @@ -27,12 +27,12 @@ class DocumentSimilarityRankerModel(override val uid: String) * * @group param */ - val similarityMappings: MapFeature[String, Array[String]] = new MapFeature(this, "similarityMappings") + val similarityMappings: MapFeature[String, Map[Int, Array[Int]]] = new MapFeature(this, "similarityMappings") /** @group setParam */ - def setSimilarityMappings(value: Map[String, Array[String]]): this.type = set(similarityMappings, value) + def setSimilarityMappings(value: Map[String, Map[Int, Array[Int]]]): this.type = set(similarityMappings, value) - def getSimilarityMappings: Array[String] = $$(similarityMappings).getOrElse("similarityMappings", Array("")) + def getSimilarityMappings: Map[Int, Array[Int]] = $$(similarityMappings).getOrElse("similarityMappings", Map.empty) setDefault( inputCols -> Array(SENTENCE_EMBEDDINGS), @@ -48,29 +48,25 @@ class DocumentSimilarityRankerModel(override val uid: String) * any number of annotations processed for every input annotation. Not necessary one to one * relationship */ - override def annotate(annotations: Seq[Annotation]): Seq[Annotation] = { - - val mappings: Map[String, String] = getSimilarityMappings - .map(s => s.split("=>")) - .map { case Array(index, neighbors) => (index, neighbors) } - .toMap + override def annotate(annotations: Seq[Annotation]): Seq[Annotation] = annotations.map( annotation => { val inputResult = annotation.result - val indexTarget = MurmurHash3.stringHash(inputResult, MurmurHash3.stringSeed).toString - val neighbors: String = mappings.getOrElse(indexTarget, "NA") + val targetIndex = MurmurHash3.stringHash(inputResult, MurmurHash3.stringSeed) + val neighbors: Array[Int] = getSimilarityMappings.getOrElse(targetIndex, Array(-1)) // index NA Annotation( annotatorType = outputAnnotatorType, begin = annotation.begin, end = annotation.end, result = annotation.result, - metadata = annotation.metadata + ("id"-> indexTarget) + ("neighbors" -> neighbors) , + metadata = annotation.metadata + + ("lshId"-> targetIndex.toString) + + ("lshNeighbors" -> neighbors.mkString("[", ",", "]")) , embeddings = annotation.embeddings) } ) - } } object DocumentSimilarityRanker extends DefaultParamsReadable[DocumentSimilarityRankerModel] \ No newline at end of file diff --git a/src/test/scala/com/johnsnowlabs/nlp/similarity/DocumentSimilarityRankerTestSpec.scala b/src/test/scala/com/johnsnowlabs/nlp/similarity/DocumentSimilarityRankerTestSpec.scala index 5f3d1c0180bed5..9a429c5cd10b0c 100644 --- a/src/test/scala/com/johnsnowlabs/nlp/similarity/DocumentSimilarityRankerTestSpec.scala +++ b/src/test/scala/com/johnsnowlabs/nlp/similarity/DocumentSimilarityRankerTestSpec.scala @@ -1,9 +1,9 @@ package com.johnsnowlabs.nlp.similarity -import com.johnsnowlabs.nlp.AnnotatorType.{DOC_SIMILARITY_RANKINGS, SENTENCE_EMBEDDINGS} +import com.johnsnowlabs.nlp.AnnotatorType.DOC_SIMILARITY_RANKINGS import com.johnsnowlabs.nlp.annotators.Tokenizer import com.johnsnowlabs.nlp.annotators.sbd.pragmatic.SentenceDetector -import com.johnsnowlabs.nlp.annotators.similarity.{DocumentSimilarityRanker, DocumentSimilarityRankerApproach, DocumentSimilarityRankerModel} +import com.johnsnowlabs.nlp.annotators.similarity.DocumentSimilarityRankerApproach import com.johnsnowlabs.nlp.base.DocumentAssembler import com.johnsnowlabs.nlp.embeddings.SentenceEmbeddings import com.johnsnowlabs.nlp.util.io.ResourceHelper @@ -11,8 +11,12 @@ import com.johnsnowlabs.nlp.{AnnotatorBuilder, EmbeddingsFinisher} import com.johnsnowlabs.tags.SlowTest import org.apache.spark.ml.Pipeline import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.functions.col import org.scalatest.flatspec.AnyFlatSpec +import scala.util.hashing.MurmurHash3 +import scala.util.parsing.json.JSON.parseFull + class DocumentSimilarityRankerTestSpec extends AnyFlatSpec { val spark: SparkSession = ResourceHelper.spark @@ -60,7 +64,12 @@ class DocumentSimilarityRankerTestSpec extends AnyFlatSpec { .setInputCols("sentence_embeddings") .setOutputCol(DOC_SIMILARITY_RANKINGS) .setSimilarityMethod("brp") - .setNumberOfNeighbours(1) + .setNumberOfNeighbours(3) + +// val finisher = new DocumentSimilarityRankerFinisher() +// .setInputCols("sentence_embeddings") +// .setOutputCols("finished_sentence_embeddings") +// .setCleanAnnotations(false) // val docSimilarityFinalizer @@ -73,14 +82,22 @@ class DocumentSimilarityRankerTestSpec extends AnyFlatSpec { embeddings, embeddingsSentence, sentenceFinisher, - docSimilarityRanker -// docSimilarityFinalizer + docSimilarityRanker, + // finisher )) val pipelineDF = pipeline.fit(smallCorpus).transform(smallCorpus) pipelineDF.printSchema - pipelineDF.show(false) + // pipelineDF.show(false) + pipelineDF.select(DOC_SIMILARITY_RANKINGS).show(false) + + // get text + val hashId = MurmurHash3.stringHash("First document, this is my first sentence. This is my second sentence.", MurmurHash3.stringSeed) + pipelineDF + .withColumn("lshId", col("doc_similarity_rankings.metadata").getItem("lshId")) + .withColumn("lshNeighbors", col("doc_similarity_rankings.metadata").getItem("lshId")) + .show } } \ No newline at end of file From 6969526cb1c2e9f0d146bfd9332164f39a866d39 Mon Sep 17 00:00:00 2001 From: Stefano Lori Date: Tue, 7 Mar 2023 21:46:13 +0100 Subject: [PATCH 12/26] Added finisher to extract lsh id and neighbors --- .../DocumentSimilarityRankerFinisher.scala | 108 ++++++++++++++++++ .../DocumentSimilarityRankerTestSpec.scala | 32 ++---- 2 files changed, 118 insertions(+), 22 deletions(-) create mode 100644 src/main/scala/com/johnsnowlabs/nlp/finisher/DocumentSimilarityRankerFinisher.scala diff --git a/src/main/scala/com/johnsnowlabs/nlp/finisher/DocumentSimilarityRankerFinisher.scala b/src/main/scala/com/johnsnowlabs/nlp/finisher/DocumentSimilarityRankerFinisher.scala new file mode 100644 index 00000000000000..0cf400a00f7235 --- /dev/null +++ b/src/main/scala/com/johnsnowlabs/nlp/finisher/DocumentSimilarityRankerFinisher.scala @@ -0,0 +1,108 @@ +package com.johnsnowlabs.nlp.finisher + +import com.johnsnowlabs.nlp.AnnotatorType +import com.johnsnowlabs.nlp.util.FinisherUtil +import org.apache.spark.ml.Transformer +import org.apache.spark.ml.param.{ParamMap, StringArrayParam} +import org.apache.spark.ml.util.{DefaultParamsWritable, Identifiable} +import org.apache.spark.sql.functions.{col, element_at} +import org.apache.spark.sql.types.{ArrayType, FloatType, StructField, StructType} +import org.apache.spark.sql.{DataFrame, Dataset} + +case class DocumentSimilarityRankerFinisher (override val uid: String) + extends Transformer + with DefaultParamsWritable { + + def this() = this(Identifiable.randomUID("document_similarity_ranker_finisher")) + + val LSH_ID_COL_NAME = "lshId" + + val LSH_NEIGHBORS_COL_NAME = "lshNeighbors" + + /** Name of input annotation cols containing embeddings + * + * @group param + */ + val inputCols: StringArrayParam = + new StringArrayParam(this, "inputCols", "Name of input annotation cols containing similar documents") + + /** Name of input annotation cols containing similar documents + * + * @group setParam + */ + def setInputCols(value: Array[String]): this.type = set(inputCols, value) + + /** Name of input annotation cols containing similar documents + * + * @group setParam + */ + def setInputCols(value: String*): this.type = setInputCols(value.toArray) + + /** Name of DocumentSimilarityRankerFinisher output cols + * + * @group getParam + */ + def getInputCols: Array[String] = $(inputCols) + + /** Name of DocumentSimilarityRankerFinisher output cols + * + * @group param + */ + val outputCols: StringArrayParam = + new StringArrayParam(this, "outputCols", "Name of DocumentSimilarityRankerFinisher output cols") + + /** Name of DocumentSimilarityRankerFinisher output cols + * + * @group setParam + */ + def setOutputCols(value: Array[String]): this.type = set(outputCols, value) + + /** Name of DocumentSimilarityRankerFinisher output cols + * + * @group setParam + */ + def setOutputCols(value: String*): this.type = setOutputCols(value.toArray) + + /** Name of input annotation cols containing embeddings + * + * @group getParam + */ + def getOutputCols: Array[String] = get(outputCols).getOrElse(getInputCols.map("finished_" + _)) + + override def transform(dataset: Dataset[_]): DataFrame = { + val finisherBaseColName = "finished_doc_similarity_ranker" + dataset + .withColumn( + s"${finisherBaseColName}_id", + element_at(col(s"${AnnotatorType.DOC_SIMILARITY_RANKINGS}.metadata"), 1) + .getItem(LSH_ID_COL_NAME) + ) + .withColumn(s"${finisherBaseColName}_neighbors", + element_at(col(s"${AnnotatorType.DOC_SIMILARITY_RANKINGS}.metadata"), 1) + .getItem(LSH_NEIGHBORS_COL_NAME) + ) + } + + override def copy(extra: ParamMap): Transformer = defaultCopy(extra) + + override def transformSchema(schema: StructType): StructType = { + val documentSimilarityRankerAnnotators = Seq(AnnotatorType.DOC_SIMILARITY_RANKINGS) + + getInputCols.foreach { annotationColumn => + FinisherUtil.checkIfInputColsExist(getInputCols, schema) + FinisherUtil.checkIfAnnotationColumnIsSparkNLPAnnotation(schema, annotationColumn) + + /** Check if the annotationColumn has DocumentSimilarityRanker. It must be + * annotators: DocumentSimilarityRanker + */ + require( + documentSimilarityRankerAnnotators.contains( + schema(annotationColumn).metadata.getString("annotatorType")), + s"column [$annotationColumn] must be of type DocumentSimilarityRanker") + } + + val outputFields = schema.fields + + StructType(outputFields) + } +} diff --git a/src/test/scala/com/johnsnowlabs/nlp/similarity/DocumentSimilarityRankerTestSpec.scala b/src/test/scala/com/johnsnowlabs/nlp/similarity/DocumentSimilarityRankerTestSpec.scala index 9a429c5cd10b0c..4c3c8fe913ac99 100644 --- a/src/test/scala/com/johnsnowlabs/nlp/similarity/DocumentSimilarityRankerTestSpec.scala +++ b/src/test/scala/com/johnsnowlabs/nlp/similarity/DocumentSimilarityRankerTestSpec.scala @@ -6,17 +6,14 @@ import com.johnsnowlabs.nlp.annotators.sbd.pragmatic.SentenceDetector import com.johnsnowlabs.nlp.annotators.similarity.DocumentSimilarityRankerApproach import com.johnsnowlabs.nlp.base.DocumentAssembler import com.johnsnowlabs.nlp.embeddings.SentenceEmbeddings +import com.johnsnowlabs.nlp.finisher.DocumentSimilarityRankerFinisher import com.johnsnowlabs.nlp.util.io.ResourceHelper import com.johnsnowlabs.nlp.{AnnotatorBuilder, EmbeddingsFinisher} import com.johnsnowlabs.tags.SlowTest import org.apache.spark.ml.Pipeline import org.apache.spark.sql.SparkSession -import org.apache.spark.sql.functions.col import org.scalatest.flatspec.AnyFlatSpec -import scala.util.hashing.MurmurHash3 -import scala.util.parsing.json.JSON.parseFull - class DocumentSimilarityRankerTestSpec extends AnyFlatSpec { val spark: SparkSession = ResourceHelper.spark @@ -66,12 +63,9 @@ class DocumentSimilarityRankerTestSpec extends AnyFlatSpec { .setSimilarityMethod("brp") .setNumberOfNeighbours(3) -// val finisher = new DocumentSimilarityRankerFinisher() -// .setInputCols("sentence_embeddings") -// .setOutputCols("finished_sentence_embeddings") -// .setCleanAnnotations(false) - - // val docSimilarityFinalizer + val documentSimilarityFinisher = new DocumentSimilarityRankerFinisher() + .setInputCols("doc_similarity_rankings") + .setOutputCols("finished_doc_similarity_rankings") val pipeline = new Pipeline() .setStages( @@ -83,21 +77,15 @@ class DocumentSimilarityRankerTestSpec extends AnyFlatSpec { embeddingsSentence, sentenceFinisher, docSimilarityRanker, - // finisher + documentSimilarityFinisher )) val pipelineDF = pipeline.fit(smallCorpus).transform(smallCorpus) - pipelineDF.printSchema - // pipelineDF.show(false) - pipelineDF.select(DOC_SIMILARITY_RANKINGS).show(false) - - // get text - val hashId = MurmurHash3.stringHash("First document, this is my first sentence. This is my second sentence.", MurmurHash3.stringSeed) - - pipelineDF - .withColumn("lshId", col("doc_similarity_rankings.metadata").getItem("lshId")) - .withColumn("lshNeighbors", col("doc_similarity_rankings.metadata").getItem("lshId")) - .show + pipelineDF.select( + "finished_doc_similarity_ranker_id", + "text", + "finished_doc_similarity_ranker_neighbors" + ).show(false) } } \ No newline at end of file From d9706324989d07ba480fc4c898b0a24c2c1eb0c9 Mon Sep 17 00:00:00 2001 From: Stefano Lori Date: Tue, 7 Mar 2023 22:28:31 +0100 Subject: [PATCH 13/26] Labels refactoring --- .../DocumentSimilarityRankerFinisher.scala | 20 ++++++++++++++++--- .../DocumentSimilarityRankerTestSpec.scala | 18 ++++++++++++----- 2 files changed, 30 insertions(+), 8 deletions(-) diff --git a/src/main/scala/com/johnsnowlabs/nlp/finisher/DocumentSimilarityRankerFinisher.scala b/src/main/scala/com/johnsnowlabs/nlp/finisher/DocumentSimilarityRankerFinisher.scala index 0cf400a00f7235..80d93440b81b92 100644 --- a/src/main/scala/com/johnsnowlabs/nlp/finisher/DocumentSimilarityRankerFinisher.scala +++ b/src/main/scala/com/johnsnowlabs/nlp/finisher/DocumentSimilarityRankerFinisher.scala @@ -19,6 +19,10 @@ case class DocumentSimilarityRankerFinisher (override val uid: String) val LSH_NEIGHBORS_COL_NAME = "lshNeighbors" + val FINISHED_DOC_SIM_RANKER_ID_DEFAULT = "finished_doc_similarity_rankings_id" + + val FINISHED_DOC_SIM_RANKER_NEIGHBORS_DEFAULT = "finished_doc_similarity_rankings_neighbors" + /** Name of input annotation cols containing embeddings * * @group param @@ -70,14 +74,24 @@ case class DocumentSimilarityRankerFinisher (override val uid: String) def getOutputCols: Array[String] = get(outputCols).getOrElse(getInputCols.map("finished_" + _)) override def transform(dataset: Dataset[_]): DataFrame = { - val finisherBaseColName = "finished_doc_similarity_ranker" + + require(getOutputCols.length == 1 || getOutputCols.length == 2, + "Output column array should have length 1 (default case) or 2 when value id and neighbors are assigned.") + + val (idColName, neighborsColName) = + getOutputCols.length match { + case 1 => (FINISHED_DOC_SIM_RANKER_ID_DEFAULT, FINISHED_DOC_SIM_RANKER_NEIGHBORS_DEFAULT) + case 2 => (getOutputCols(0), getOutputCols(1)) + } + dataset .withColumn( - s"${finisherBaseColName}_id", + idColName, element_at(col(s"${AnnotatorType.DOC_SIMILARITY_RANKINGS}.metadata"), 1) .getItem(LSH_ID_COL_NAME) ) - .withColumn(s"${finisherBaseColName}_neighbors", + .withColumn( + neighborsColName, element_at(col(s"${AnnotatorType.DOC_SIMILARITY_RANKINGS}.metadata"), 1) .getItem(LSH_NEIGHBORS_COL_NAME) ) diff --git a/src/test/scala/com/johnsnowlabs/nlp/similarity/DocumentSimilarityRankerTestSpec.scala b/src/test/scala/com/johnsnowlabs/nlp/similarity/DocumentSimilarityRankerTestSpec.scala index 4c3c8fe913ac99..c20c3be0193b9d 100644 --- a/src/test/scala/com/johnsnowlabs/nlp/similarity/DocumentSimilarityRankerTestSpec.scala +++ b/src/test/scala/com/johnsnowlabs/nlp/similarity/DocumentSimilarityRankerTestSpec.scala @@ -24,8 +24,11 @@ class DocumentSimilarityRankerTestSpec extends AnyFlatSpec { "First document, this is my first sentence. This is my second sentence.", "Second document, this is my first sentence. This is my second sentence.", "Third document, climate change is arguably one of the most pressing problems of our time.", - "Fourth document, Florence in Italy, is among the most beautiful cities in Europe.", - "Fifth document, The French Riviera is the Mediterranean coastline of the southeast corner of France.", + "Fourth document, climate change is definitely one of the most pressing problems of our time.", + "Fifth document, Florence in Italy, is among the most beautiful cities in Europe.", + "Sixth document, Florence in Italy, is a very beautiful city in Europe like Lyon in France.", + "Seventh document, the French Riviera is the Mediterranean coastline of the southeast corner of France.", + "Eighth document, the warmest place in France is the French Riviera coast in Southern France.", ).map(Tuple1(_))) .toDF("text") @@ -65,7 +68,12 @@ class DocumentSimilarityRankerTestSpec extends AnyFlatSpec { val documentSimilarityFinisher = new DocumentSimilarityRankerFinisher() .setInputCols("doc_similarity_rankings") - .setOutputCols("finished_doc_similarity_rankings") + .setOutputCols( + Array( + "finished_doc_similarity_rankings_id", + "finished_doc_similarity_rankings_neighbors" + ) + ) val pipeline = new Pipeline() .setStages( @@ -83,9 +91,9 @@ class DocumentSimilarityRankerTestSpec extends AnyFlatSpec { val pipelineDF = pipeline.fit(smallCorpus).transform(smallCorpus) pipelineDF.select( - "finished_doc_similarity_ranker_id", + "finished_doc_similarity_rankings_id", "text", - "finished_doc_similarity_ranker_neighbors" + "finished_doc_similarity_rankings_neighbors" ).show(false) } } \ No newline at end of file From da50d61e2c73efa1cfacda251bbf85abb5c917e7 Mon Sep 17 00:00:00 2001 From: Stefano Lori Date: Sat, 11 Mar 2023 22:09:43 +0100 Subject: [PATCH 14/26] Added distance param to show in rankings --- .../DocumentSimilarityRankerApproach.scala | 69 ++++++++++++++----- .../DocumentSimilarityRankerModel.scala | 10 +-- .../DocumentSimilarityRankerTestSpec.scala | 16 ++--- 3 files changed, 62 insertions(+), 33 deletions(-) diff --git a/src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/DocumentSimilarityRankerApproach.scala b/src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/DocumentSimilarityRankerApproach.scala index 1b2279c30330e7..f39894041d5af3 100644 --- a/src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/DocumentSimilarityRankerApproach.scala +++ b/src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/DocumentSimilarityRankerApproach.scala @@ -4,17 +4,28 @@ import com.johnsnowlabs.nlp.AnnotatorType.{DOC_SIMILARITY_RANKINGS, SENTENCE_EMB import com.johnsnowlabs.nlp.{AnnotatorApproach, HasEnableCachingProperties} import com.johnsnowlabs.storage.HasStorageRef import org.apache.spark.ml.PipelineModel -import org.apache.spark.ml.feature.{BucketedRandomProjectionLSH, BucketedRandomProjectionLSHModel, VectorAssembler} +import org.apache.spark.ml.feature.{BucketedRandomProjectionLSH, BucketedRandomProjectionLSHModel} import org.apache.spark.ml.functions.array_to_vector -import org.apache.spark.ml.linalg.{DenseVector, Vectors} -import org.apache.spark.ml.param.Param +import org.apache.spark.ml.linalg.DenseVector +import org.apache.spark.ml.param.{BooleanParam, Param} import org.apache.spark.ml.util.Identifiable -import org.apache.spark.sql.expressions.Window -import org.apache.spark.sql.{DataFrame, Dataset, SparkSession} -import org.apache.spark.sql.functions.{col, expr, flatten, hash, monotonically_increasing_id, row_number, udf} +import org.apache.spark.sql.functions.{col, flatten, udf} +import org.apache.spark.sql.{DataFrame, Dataset} import scala.util.hashing.MurmurHash3 + +sealed trait NeighborAnnotation { + def neighbors: Array[_] +} + +case class IndexedNeighbors(neighbors: Array[Int]) extends NeighborAnnotation + +case class IndexedNeighborsWithDistance(neighbors: Array[(Int, Double)]) extends NeighborAnnotation + +case class NeighborsResultSet(result: (Int, NeighborAnnotation)) + + class DocumentSimilarityRankerApproach(override val uid: String) extends AnnotatorApproach[DocumentSimilarityRankerModel] with HasStorageRef @@ -37,6 +48,8 @@ class DocumentSimilarityRankerApproach(override val uid: String) val INDEX_COL_NAME = "index" + val DISTANCE = "distCol" + val INPUT_EMBEDDINGS = "sentence_embeddings.embeddings" val TEXT = "text" @@ -91,7 +104,14 @@ class DocumentSimilarityRankerApproach(override val uid: String) def setNumHashTables(value: Int): this.type = set(numHashTables, value) - def getNumHashTables: Int = $(numHashTables) + val visibleDistances = new BooleanParam( + this, + "setVisibleDistances", + "Whether to set visibleDistances in LSH output (Default: `false`)") + + def setVisibleDistances(value: Boolean): this.type = set(visibleDistances, value) + + def getVisibleDistances: Boolean = $(visibleDistances) setDefault( inputCols -> Array(SENTENCE_EMBEDDINGS), @@ -99,21 +119,32 @@ class DocumentSimilarityRankerApproach(override val uid: String) similarityMethod -> "brp", numberOfNeighbours -> 10, bucketLength -> 2.0, - numHashTables -> 3 + numHashTables -> 3, + visibleDistances -> false ) - def getNeighboursRankedDocIndexes(model: BucketedRandomProjectionLSHModel, - query: (Int, DenseVector), - similarityDataset: DataFrame) = { + def getNeighborsResultSet(model: BucketedRandomProjectionLSHModel, + query: (Int, DenseVector), + similarityDataset: DataFrame): NeighborsResultSet = { query match { case (index, queryVector) => val similarRankedDocs = model.approxNearestNeighbors(similarityDataset, queryVector, getNumberOfNeighbours) - val neighboursRankedDocIndexes: Array[Int] = similarRankedDocs - .select(INDEX_COL_NAME) - .collect() - .map(_.getInt(0)) - (index, neighboursRankedDocIndexes) + if(getVisibleDistances) { + val rankedNeighboursWithDistances = similarRankedDocs + .select(INDEX_COL_NAME, DISTANCE) + .collect() + .map(row => (row.getInt(0), row.getDouble(1))) + + NeighborsResultSet((index, IndexedNeighborsWithDistance(rankedNeighboursWithDistances))) + } else { + val rankedNeighbours = similarRankedDocs + .select(INDEX_COL_NAME) + .collect() + .map(_.getInt(0)) + + NeighborsResultSet(index, IndexedNeighbors(rankedNeighbours)) + } } } @@ -148,9 +179,9 @@ class DocumentSimilarityRankerApproach(override val uid: String) .map(x => (x.getAs[Int](INDEX_COL_NAME), x.getAs[DenseVector](LSH_INPUT_COL_NAME))) .collect() - val similarityMappings: Map[Int, Array[Int]] = indexedVectorTuples - .map(query => getNeighboursRankedDocIndexes(model, query, similarityDatasetWithIndex)) - .toMap + val similarityMappings: Map[Int, NeighborAnnotation] = indexedVectorTuples + .map(query => getNeighborsResultSet(model, query, similarityDatasetWithIndex)) + .map(_.result).toMap new DocumentSimilarityRankerModel() .setSimilarityMappings( diff --git a/src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/DocumentSimilarityRankerModel.scala b/src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/DocumentSimilarityRankerModel.scala index 522a1b2077af61..8d5aa7a761ff29 100644 --- a/src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/DocumentSimilarityRankerModel.scala +++ b/src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/DocumentSimilarityRankerModel.scala @@ -27,12 +27,12 @@ class DocumentSimilarityRankerModel(override val uid: String) * * @group param */ - val similarityMappings: MapFeature[String, Map[Int, Array[Int]]] = new MapFeature(this, "similarityMappings") + val similarityMappings: MapFeature[String, Map[Int, NeighborAnnotation]] = new MapFeature(this, "similarityMappings") /** @group setParam */ - def setSimilarityMappings(value: Map[String, Map[Int, Array[Int]]]): this.type = set(similarityMappings, value) + def setSimilarityMappings(value: Map[String, Map[Int, NeighborAnnotation]]): this.type = set(similarityMappings, value) - def getSimilarityMappings: Map[Int, Array[Int]] = $$(similarityMappings).getOrElse("similarityMappings", Map.empty) + def getSimilarityMappings: Map[Int, NeighborAnnotation] = $$(similarityMappings).getOrElse("similarityMappings", Map.empty) setDefault( inputCols -> Array(SENTENCE_EMBEDDINGS), @@ -54,7 +54,7 @@ class DocumentSimilarityRankerModel(override val uid: String) annotation => { val inputResult = annotation.result val targetIndex = MurmurHash3.stringHash(inputResult, MurmurHash3.stringSeed) - val neighbors: Array[Int] = getSimilarityMappings.getOrElse(targetIndex, Array(-1)) // index NA + val neighborsAnnotation: NeighborAnnotation = getSimilarityMappings.getOrElse(targetIndex, IndexedNeighbors(Array.empty)) // index NA Annotation( annotatorType = outputAnnotatorType, @@ -63,7 +63,7 @@ class DocumentSimilarityRankerModel(override val uid: String) result = annotation.result, metadata = annotation.metadata + ("lshId"-> targetIndex.toString) - + ("lshNeighbors" -> neighbors.mkString("[", ",", "]")) , + + ("lshNeighbors" -> neighborsAnnotation.neighbors.mkString("[", ",", "]")) , embeddings = annotation.embeddings) } ) diff --git a/src/test/scala/com/johnsnowlabs/nlp/similarity/DocumentSimilarityRankerTestSpec.scala b/src/test/scala/com/johnsnowlabs/nlp/similarity/DocumentSimilarityRankerTestSpec.scala index c20c3be0193b9d..94df2e0eec8a77 100644 --- a/src/test/scala/com/johnsnowlabs/nlp/similarity/DocumentSimilarityRankerTestSpec.scala +++ b/src/test/scala/com/johnsnowlabs/nlp/similarity/DocumentSimilarityRankerTestSpec.scala @@ -65,15 +65,10 @@ class DocumentSimilarityRankerTestSpec extends AnyFlatSpec { .setOutputCol(DOC_SIMILARITY_RANKINGS) .setSimilarityMethod("brp") .setNumberOfNeighbours(3) + .setVisibleDistances(true) val documentSimilarityFinisher = new DocumentSimilarityRankerFinisher() .setInputCols("doc_similarity_rankings") - .setOutputCols( - Array( - "finished_doc_similarity_rankings_id", - "finished_doc_similarity_rankings_neighbors" - ) - ) val pipeline = new Pipeline() .setStages( @@ -90,10 +85,13 @@ class DocumentSimilarityRankerTestSpec extends AnyFlatSpec { val pipelineDF = pipeline.fit(smallCorpus).transform(smallCorpus) - pipelineDF.select( - "finished_doc_similarity_rankings_id", + val fitted = pipelineDF.select( "text", + "finished_doc_similarity_rankings_id", "finished_doc_similarity_rankings_neighbors" - ).show(false) + ) + + fitted.show(false) + fitted.printSchema } } \ No newline at end of file From 5c924fcf7f6a3313aefd6f7b65c5a1592ea7ee11 Mon Sep 17 00:00:00 2001 From: Stefano Lori Date: Mon, 13 Mar 2023 22:59:34 +0100 Subject: [PATCH 15/26] Added logic to select nearest neighbor --- .../DocumentSimilarityRankerFinisher.scala | 51 +++++++++++++++++-- .../DocumentSimilarityRankerTestSpec.scala | 21 +++++--- 2 files changed, 59 insertions(+), 13 deletions(-) diff --git a/src/main/scala/com/johnsnowlabs/nlp/finisher/DocumentSimilarityRankerFinisher.scala b/src/main/scala/com/johnsnowlabs/nlp/finisher/DocumentSimilarityRankerFinisher.scala index 80d93440b81b92..35ffa546cd4483 100644 --- a/src/main/scala/com/johnsnowlabs/nlp/finisher/DocumentSimilarityRankerFinisher.scala +++ b/src/main/scala/com/johnsnowlabs/nlp/finisher/DocumentSimilarityRankerFinisher.scala @@ -3,12 +3,13 @@ package com.johnsnowlabs.nlp.finisher import com.johnsnowlabs.nlp.AnnotatorType import com.johnsnowlabs.nlp.util.FinisherUtil import org.apache.spark.ml.Transformer -import org.apache.spark.ml.param.{ParamMap, StringArrayParam} +import org.apache.spark.ml.param.{BooleanParam, ParamMap, StringArrayParam} import org.apache.spark.ml.util.{DefaultParamsWritable, Identifiable} -import org.apache.spark.sql.functions.{col, element_at} -import org.apache.spark.sql.types.{ArrayType, FloatType, StructField, StructType} +import org.apache.spark.sql.functions._ +import org.apache.spark.sql.types.{IntegerType, StructType} import org.apache.spark.sql.{DataFrame, Dataset} + case class DocumentSimilarityRankerFinisher (override val uid: String) extends Transformer with DefaultParamsWritable { @@ -73,10 +74,29 @@ case class DocumentSimilarityRankerFinisher (override val uid: String) */ def getOutputCols: Array[String] = get(outputCols).getOrElse(getInputCols.map("finished_" + _)) + val extractNearestNeighbor: BooleanParam = + new BooleanParam(this, "extractNearestNeighbor", doc = "Extract the best neighbors with distance") + + /** Set flag to extract best neighbor with distance + * + * @group setParam + */ + def setExtractNearestNeighbor(value: Boolean): this.type = set(extractNearestNeighbor, value) + + /** Name of input annotation cols containing embeddings + * + * @group getParam + */ + def getExtractNearestNeighbor: Boolean = $(extractNearestNeighbor) + + setDefault( + extractNearestNeighbor -> false + ) + override def transform(dataset: Dataset[_]): DataFrame = { require(getOutputCols.length == 1 || getOutputCols.length == 2, - "Output column array should have length 1 (default case) or 2 when value id and neighbors are assigned.") + "Output column array should have length 1 (default case) or 2 when value id and neighbors are assigned.") val (idColName, neighborsColName) = getOutputCols.length match { @@ -84,17 +104,38 @@ case class DocumentSimilarityRankerFinisher (override val uid: String) case 2 => (getOutputCols(0), getOutputCols(1)) } - dataset + val transformed = dataset .withColumn( idColName, element_at(col(s"${AnnotatorType.DOC_SIMILARITY_RANKINGS}.metadata"), 1) .getItem(LSH_ID_COL_NAME) + .cast("int") ) .withColumn( neighborsColName, element_at(col(s"${AnnotatorType.DOC_SIMILARITY_RANKINGS}.metadata"), 1) .getItem(LSH_NEIGHBORS_COL_NAME) ) + + val formatted = transformed + .withColumn(s"no_squared_$neighborsColName", regexp_replace(col(neighborsColName), "[\\[\\]]", "")) + .withColumn(s"tuple_extract_$neighborsColName", regexp_extract(col(s"no_squared_$neighborsColName"), "\\((.*?)\\)", 0)) + .withColumn(s"no_rounded_$neighborsColName", regexp_replace(col(s"tuple_extract_$neighborsColName"), "[\\(\\)]", "")) + + val result = + if(getExtractNearestNeighbor) + formatted + .withColumn(s"split_$neighborsColName", split(col(s"no_rounded_$neighborsColName"), ",")) + .withColumn("nearest_neighbor_id", element_at(col(s"split_$neighborsColName"), 1).cast(IntegerType)) + .withColumn("nearest_neighbor_distance", element_at(col(s"split_$neighborsColName"), 2)) + else + formatted + + result.drop( + s"no_squared_$neighborsColName", + s"tuple_extract_$neighborsColName", + s"no_rounded_$neighborsColName", + s"split_$neighborsColName") } override def copy(extra: ParamMap): Transformer = defaultCopy(extra) diff --git a/src/test/scala/com/johnsnowlabs/nlp/similarity/DocumentSimilarityRankerTestSpec.scala b/src/test/scala/com/johnsnowlabs/nlp/similarity/DocumentSimilarityRankerTestSpec.scala index 94df2e0eec8a77..46c9bd1916895b 100644 --- a/src/test/scala/com/johnsnowlabs/nlp/similarity/DocumentSimilarityRankerTestSpec.scala +++ b/src/test/scala/com/johnsnowlabs/nlp/similarity/DocumentSimilarityRankerTestSpec.scala @@ -12,6 +12,7 @@ import com.johnsnowlabs.nlp.{AnnotatorBuilder, EmbeddingsFinisher} import com.johnsnowlabs.tags.SlowTest import org.apache.spark.ml.Pipeline import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.functions.col import org.scalatest.flatspec.AnyFlatSpec class DocumentSimilarityRankerTestSpec extends AnyFlatSpec { @@ -66,9 +67,14 @@ class DocumentSimilarityRankerTestSpec extends AnyFlatSpec { .setSimilarityMethod("brp") .setNumberOfNeighbours(3) .setVisibleDistances(true) + // .setIncludeQueryInResult(true) // TODO useful for debug val documentSimilarityFinisher = new DocumentSimilarityRankerFinisher() .setInputCols("doc_similarity_rankings") + .setOutputCols( + "finished_doc_similarity_rankings_id", + "finished_doc_similarity_rankings_neighbors") + .setExtractNearestNeighbor(true) val pipeline = new Pipeline() .setStages( @@ -83,15 +89,14 @@ class DocumentSimilarityRankerTestSpec extends AnyFlatSpec { documentSimilarityFinisher )) - val pipelineDF = pipeline.fit(smallCorpus).transform(smallCorpus) + val transformed = pipeline.fit(smallCorpus).transform(smallCorpus) - val fitted = pipelineDF.select( - "text", - "finished_doc_similarity_rankings_id", - "finished_doc_similarity_rankings_neighbors" - ) + transformed.printSchema + transformed + .select("text", "finished_doc_similarity_rankings_id", "nearest_neighbor_id", "nearest_neighbor_distance") + .show(false) - fitted.show(false) - fitted.printSchema + // correct if not empty as inclusive query points are at distance 0.0 from themselves + assert(!transformed.where(col("nearest_neighbor_distance") === 0.0).rdd.isEmpty() == true) } } \ No newline at end of file From 2d3451d7989397d693d01c5af34c7bb54f0c3bf8 Mon Sep 17 00:00:00 2001 From: Stefano Lori Date: Wed, 15 Mar 2023 22:03:16 +0100 Subject: [PATCH 16/26] Added identity ranking for debugging --- .../DocumentSimilarityRankerApproach.scala | 25 ++++++++++++++++--- .../DocumentSimilarityRankerTestSpec.scala | 2 +- 2 files changed, 22 insertions(+), 5 deletions(-) diff --git a/src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/DocumentSimilarityRankerApproach.scala b/src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/DocumentSimilarityRankerApproach.scala index f39894041d5af3..211ba768a95e35 100644 --- a/src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/DocumentSimilarityRankerApproach.scala +++ b/src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/DocumentSimilarityRankerApproach.scala @@ -106,13 +106,23 @@ class DocumentSimilarityRankerApproach(override val uid: String) val visibleDistances = new BooleanParam( this, - "setVisibleDistances", - "Whether to set visibleDistances in LSH output (Default: `false`)") + "visibleDistances", + "Whether to set visibleDistances in ranking output (Default: `false`)") def setVisibleDistances(value: Boolean): this.type = set(visibleDistances, value) def getVisibleDistances: Boolean = $(visibleDistances) + val identityRanking = new BooleanParam( + this, + "identityRanking", + "Whether to include identity in ranking result set. Useful for debug. (Default: `false`)") + + def setIdentityRanking(value: Boolean): this.type = set(identityRanking, value) + + def getIdentityRanking: Boolean = $(identityRanking) + + setDefault( inputCols -> Array(SENTENCE_EMBEDDINGS), outputCol -> DOC_SIMILARITY_RANKINGS, @@ -120,7 +130,8 @@ class DocumentSimilarityRankerApproach(override val uid: String) numberOfNeighbours -> 10, bucketLength -> 2.0, numHashTables -> 3, - visibleDistances -> false + visibleDistances -> false, + identityRanking -> false ) def getNeighborsResultSet(model: BucketedRandomProjectionLSHModel, @@ -128,7 +139,13 @@ class DocumentSimilarityRankerApproach(override val uid: String) similarityDataset: DataFrame): NeighborsResultSet = { query match { case (index, queryVector) => - val similarRankedDocs = model.approxNearestNeighbors(similarityDataset, queryVector, getNumberOfNeighbours) + val _similarityDataset = + if (getIdentityRanking) { + similarityDataset + } else { + similarityDataset.where(col("index") =!= index) + } + val similarRankedDocs = model.approxNearestNeighbors(_similarityDataset, queryVector, getNumberOfNeighbours) if(getVisibleDistances) { val rankedNeighboursWithDistances = similarRankedDocs diff --git a/src/test/scala/com/johnsnowlabs/nlp/similarity/DocumentSimilarityRankerTestSpec.scala b/src/test/scala/com/johnsnowlabs/nlp/similarity/DocumentSimilarityRankerTestSpec.scala index 46c9bd1916895b..6bd04a9c2b6880 100644 --- a/src/test/scala/com/johnsnowlabs/nlp/similarity/DocumentSimilarityRankerTestSpec.scala +++ b/src/test/scala/com/johnsnowlabs/nlp/similarity/DocumentSimilarityRankerTestSpec.scala @@ -67,7 +67,7 @@ class DocumentSimilarityRankerTestSpec extends AnyFlatSpec { .setSimilarityMethod("brp") .setNumberOfNeighbours(3) .setVisibleDistances(true) - // .setIncludeQueryInResult(true) // TODO useful for debug + .setIdentityRanking(true) val documentSimilarityFinisher = new DocumentSimilarityRankerFinisher() .setInputCols("doc_similarity_rankings") From 632b3ca086d314f2d13e07c5a072477a88d0f39f Mon Sep 17 00:00:00 2001 From: Stefano Lori Date: Sat, 18 Mar 2023 21:55:32 +0100 Subject: [PATCH 17/26] Adding Python interface to doc sim ranker approach and model --- .../sparknlp/annotator/similarity/__init__.py | 0 .../similarity/document_similarity_ranker.py | 150 ++++++++++++++++++ python/sparknlp/common/annotator_type.py | 1 + .../DocumentSimilarityRankerApproach.scala | 2 +- .../DocumentSimilarityRankerTestSpec.scala | 10 +- 5 files changed, 159 insertions(+), 4 deletions(-) create mode 100644 python/sparknlp/annotator/similarity/__init__.py create mode 100644 python/sparknlp/annotator/similarity/document_similarity_ranker.py diff --git a/python/sparknlp/annotator/similarity/__init__.py b/python/sparknlp/annotator/similarity/__init__.py new file mode 100644 index 00000000000000..e69de29bb2d1d6 diff --git a/python/sparknlp/annotator/similarity/document_similarity_ranker.py b/python/sparknlp/annotator/similarity/document_similarity_ranker.py new file mode 100644 index 00000000000000..6ad255c8e75dff --- /dev/null +++ b/python/sparknlp/annotator/similarity/document_similarity_ranker.py @@ -0,0 +1,150 @@ +# Copyright 2017-2023 John Snow Labs +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Contains classes for DocumentSimilarityRanker.""" + +from sparknlp.common import * + + +class DocumentSimilarityRankerApproach(AnnotatorApproach, HasStorageRef, HasEnableCachingProperties): + inputAnnotatorTypes = [AnnotatorType.SENTENCE_EMBEDDINGS] + + outputAnnotatorType = AnnotatorType.DOC_SIMILARITY_RANKINGS + + similarityMethod = Param(Params._dummy(), + "similarityMethod", + "The similarity method used to calculate the neighbours. (Default: 'brp', " + "Bucketed Random Projection for Euclidean Distance)", + typeConverter=TypeConverters.toString()) + + numberOfNeighbours = Param(Params._dummy(), + "numberOfNeighbours", + "The number of neighbours the model will return (Default:`10`)", + typeConverter=TypeConverters.toInt()) + + bucketLength = Param(Params._dummy(), + "bucketLength", + "The bucket length that controls the average size of hash buckets. " + "A larger bucket length (i.e., fewer buckets) increases the probability of features " + "being hashed to the same bucket (increasing the numbers of true and false positives).", + typeConverter=TypeConverters.toFloat()) + + numHashTables = Param(Params._dummy(), + "numHashTables", + "number of hash tables, where increasing number of hash tables lowers the " + "false negative rate,and decreasing it improves the running performance.", + typeConverter=TypeConverters.toInt()) + + visibleDistances = Param(Params._dummy(), + "numHashTables", + "Whether to set visibleDistances in ranking output (Default: `false`).", + typeConverter=TypeConverters.toBoolean()) + + identityRanking = Param(Params._dummy(), + "numHashTables", + "Whether to include identity in ranking result set. Useful for debug. (Default: `false`).", + typeConverter=TypeConverters.toBoolean()) + + def setSimilarityMethod(self, value): + """Sets the similarity method used to calculate the neighbours. + (Default: `"brp"`, Bucketed Random Projection for Euclidean Distance) + + Parameters + ---------- + value : str + the similarity method to calculate the neighbours. + """ + return self._set(similarityMethod=value) + + def numberOfNeighbours(self, value): + """Sets The number of neighbours the model will return for each document(Default:`"10"`). + + Parameters + ---------- + value : str + the number of neighbours the model will return for each document. + """ + return self._set(numberOfNeighbours=value) + + def bucketLength(self, value): + """Sets the bucket length that controls the average size of hash buckets (Default:`"2.0"`). + + Parameters + ---------- + value : float + Sets the bucket length that controls the average size of hash buckets. + """ + return self._set(bucketLength=value) + + def numHashTables(self, value): + """Sets the number of hash tables. + + Parameters + ---------- + value : int + Sets the number of hash tables. + """ + return self._set(numHashTables=value) + + def visibleDistances(self, value): + """Sets the document distances visible in the result set. + + Parameters + ---------- + value : bool + Sets the document distances visible in the result set. + Default('False') + """ + return self._set(visibleDistances=value) + + def visibleDistances(self, value): + """Sets the document identity ranking inclusive in the result set. + + Parameters + ---------- + value : bool + Sets the document identity ranking inclusive in the result set. + Useful for debugging. + Default('False'). + """ + return self._set(identityRanking=value) + + @keyword_only + def __init__(self): + super(DocumentSimilarityRankerApproach, self).__init__(classname="com.johnsnowlabs.nlp.annotators.similarity" + ".DocumentSimilarityRankerApproach") + self._setDefault( + similarityMethod="brp", + numberOfNeighbours=10, + bucketLength=2.0, + numHashTables=3, + visibleDistances=False, + identityRanking=False + ) + + def _create_model(self, java_model): + return DocumentSimilarityRankerModel(java_model=java_model) + + +class DocumentSimilarityRankerModel(AnnotatorModel, HasStorageRef, HasEmbeddingsProperties): + + name = "DocumentSimilarityRankerModel" + inputAnnotatorTypes = [AnnotatorType.SENTENCE_EMBEDDINGS] + outputAnnotatorType = AnnotatorType.DOC_SIMILARITY_RANKINGS + + def __init__(self, classname="com.johnsnowlabs.nlp.annotators.similarity" + ".DocumentSimilarityRankerModel", java_model=None): + super(DocumentSimilarityRankerModel, self).__init__( + classname=classname, + java_model=java_model + ) \ No newline at end of file diff --git a/python/sparknlp/common/annotator_type.py b/python/sparknlp/common/annotator_type.py index 2d0eb1ed54c9e8..0cd230a5ec480d 100644 --- a/python/sparknlp/common/annotator_type.py +++ b/python/sparknlp/common/annotator_type.py @@ -35,3 +35,4 @@ class AnnotatorType(object): NODE = "node" TABLE = "table" DUMMY = "dummy" + DOC_SIMILARITY_RANKINGS = "doc_similarity_rankings" diff --git a/src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/DocumentSimilarityRankerApproach.scala b/src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/DocumentSimilarityRankerApproach.scala index 211ba768a95e35..407bbef4c031fe 100644 --- a/src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/DocumentSimilarityRankerApproach.scala +++ b/src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/DocumentSimilarityRankerApproach.scala @@ -77,7 +77,7 @@ class DocumentSimilarityRankerApproach(override val uid: String) val numberOfNeighbours = new Param[Int]( this, "numberOfNeighbours", - """The number of neighbours the model will return (Default:`"10"`)""") + """The number of neighbours the model will return for each document (Default:`"10"`)""") def setNumberOfNeighbours(value: Int): this.type = set(numberOfNeighbours, value) diff --git a/src/test/scala/com/johnsnowlabs/nlp/similarity/DocumentSimilarityRankerTestSpec.scala b/src/test/scala/com/johnsnowlabs/nlp/similarity/DocumentSimilarityRankerTestSpec.scala index 6bd04a9c2b6880..784a2cde3077ee 100644 --- a/src/test/scala/com/johnsnowlabs/nlp/similarity/DocumentSimilarityRankerTestSpec.scala +++ b/src/test/scala/com/johnsnowlabs/nlp/similarity/DocumentSimilarityRankerTestSpec.scala @@ -23,7 +23,7 @@ class DocumentSimilarityRankerTestSpec extends AnyFlatSpec { val smallCorpus = spark.createDataFrame( List( "First document, this is my first sentence. This is my second sentence.", - "Second document, this is my first sentence. This is my second sentence.", + "Second document, this is my second sentence. This is my second sentence.", "Third document, climate change is arguably one of the most pressing problems of our time.", "Fourth document, climate change is definitely one of the most pressing problems of our time.", "Fifth document, Florence in Italy, is among the most beautiful cities in Europe.", @@ -67,7 +67,7 @@ class DocumentSimilarityRankerTestSpec extends AnyFlatSpec { .setSimilarityMethod("brp") .setNumberOfNeighbours(3) .setVisibleDistances(true) - .setIdentityRanking(true) + .setIdentityRanking(false) val documentSimilarityFinisher = new DocumentSimilarityRankerFinisher() .setInputCols("doc_similarity_rankings") @@ -93,7 +93,11 @@ class DocumentSimilarityRankerTestSpec extends AnyFlatSpec { transformed.printSchema transformed - .select("text", "finished_doc_similarity_rankings_id", "nearest_neighbor_id", "nearest_neighbor_distance") + .select("text", + "finished_sentence_embeddings", + "finished_doc_similarity_rankings_id", + "nearest_neighbor_id", + "nearest_neighbor_distance") .show(false) // correct if not empty as inclusive query points are at distance 0.0 from themselves From abc3ff4a4d544a6db0f76d22741ecadc3d3dbe93 Mon Sep 17 00:00:00 2001 From: Stefano Lori Date: Sat, 25 Mar 2023 21:16:43 +0100 Subject: [PATCH 18/26] WIP - Python interface --- .../similarity/document_similarity_ranker.py | 40 +++--- python/test/annotator/similarity/__init__.py | 0 .../similarity/doc_similarity_ranker_test.py | 92 +++++++++++++ .../DocumentSimilarityRankerApproach.scala | 77 +++++------ .../DocumentSimilarityRankerModel.scala | 79 ++++++------ .../DocumentSimilarityRankerFinisher.scala | 121 ++++++++++-------- .../DocumentSimilarityRankerTestSpec.scala | 31 ++--- 7 files changed, 278 insertions(+), 162 deletions(-) create mode 100644 python/test/annotator/similarity/__init__.py create mode 100644 python/test/annotator/similarity/doc_similarity_ranker_test.py diff --git a/python/sparknlp/annotator/similarity/document_similarity_ranker.py b/python/sparknlp/annotator/similarity/document_similarity_ranker.py index 6ad255c8e75dff..75fe114bff4ede 100644 --- a/python/sparknlp/annotator/similarity/document_similarity_ranker.py +++ b/python/sparknlp/annotator/similarity/document_similarity_ranker.py @@ -25,35 +25,35 @@ class DocumentSimilarityRankerApproach(AnnotatorApproach, HasStorageRef, HasEnab "similarityMethod", "The similarity method used to calculate the neighbours. (Default: 'brp', " "Bucketed Random Projection for Euclidean Distance)", - typeConverter=TypeConverters.toString()) + typeConverter=TypeConverters.toString) numberOfNeighbours = Param(Params._dummy(), "numberOfNeighbours", "The number of neighbours the model will return (Default:`10`)", - typeConverter=TypeConverters.toInt()) + typeConverter=TypeConverters.toInt) bucketLength = Param(Params._dummy(), "bucketLength", "The bucket length that controls the average size of hash buckets. " "A larger bucket length (i.e., fewer buckets) increases the probability of features " "being hashed to the same bucket (increasing the numbers of true and false positives).", - typeConverter=TypeConverters.toFloat()) + typeConverter=TypeConverters.toFloat) numHashTables = Param(Params._dummy(), "numHashTables", "number of hash tables, where increasing number of hash tables lowers the " "false negative rate,and decreasing it improves the running performance.", - typeConverter=TypeConverters.toInt()) + typeConverter=TypeConverters.toInt) visibleDistances = Param(Params._dummy(), - "numHashTables", + "visibleDistances", "Whether to set visibleDistances in ranking output (Default: `false`).", - typeConverter=TypeConverters.toBoolean()) + typeConverter=TypeConverters.toBoolean) identityRanking = Param(Params._dummy(), - "numHashTables", + "identityRanking", "Whether to include identity in ranking result set. Useful for debug. (Default: `false`).", - typeConverter=TypeConverters.toBoolean()) + typeConverter=TypeConverters.toBoolean) def setSimilarityMethod(self, value): """Sets the similarity method used to calculate the neighbours. @@ -66,7 +66,7 @@ def setSimilarityMethod(self, value): """ return self._set(similarityMethod=value) - def numberOfNeighbours(self, value): + def setNumberOfNeighbours(self, value): """Sets The number of neighbours the model will return for each document(Default:`"10"`). Parameters @@ -76,7 +76,7 @@ def numberOfNeighbours(self, value): """ return self._set(numberOfNeighbours=value) - def bucketLength(self, value): + def setBucketLength(self, value): """Sets the bucket length that controls the average size of hash buckets (Default:`"2.0"`). Parameters @@ -86,7 +86,7 @@ def bucketLength(self, value): """ return self._set(bucketLength=value) - def numHashTables(self, value): + def setNumHashTables(self, value): """Sets the number of hash tables. Parameters @@ -96,7 +96,7 @@ def numHashTables(self, value): """ return self._set(numHashTables=value) - def visibleDistances(self, value): + def setVisibleDistances(self, value): """Sets the document distances visible in the result set. Parameters @@ -107,7 +107,7 @@ def visibleDistances(self, value): """ return self._set(visibleDistances=value) - def visibleDistances(self, value): + def setIdentityRanking(self, value): """Sets the document identity ranking inclusive in the result set. Parameters @@ -121,8 +121,8 @@ def visibleDistances(self, value): @keyword_only def __init__(self): - super(DocumentSimilarityRankerApproach, self).__init__(classname="com.johnsnowlabs.nlp.annotators.similarity" - ".DocumentSimilarityRankerApproach") + super(DocumentSimilarityRankerApproach, self)\ + .__init__(classname="com.johnsnowlabs.nlp.annotators.similarity.DocumentSimilarityRankerApproach") self._setDefault( similarityMethod="brp", numberOfNeighbours=10, @@ -142,9 +142,13 @@ class DocumentSimilarityRankerModel(AnnotatorModel, HasStorageRef, HasEmbeddings inputAnnotatorTypes = [AnnotatorType.SENTENCE_EMBEDDINGS] outputAnnotatorType = AnnotatorType.DOC_SIMILARITY_RANKINGS - def __init__(self, classname="com.johnsnowlabs.nlp.annotators.similarity" - ".DocumentSimilarityRankerModel", java_model=None): + def __init__(self, classname="com.johnsnowlabs.nlp.annotators.similarity.DocumentSimilarityRankerModel", + java_model=None): super(DocumentSimilarityRankerModel, self).__init__( classname=classname, java_model=java_model - ) \ No newline at end of file + ) + + +class DocumentSimilarityRankerFinisher: + pass diff --git a/python/test/annotator/similarity/__init__.py b/python/test/annotator/similarity/__init__.py new file mode 100644 index 00000000000000..e69de29bb2d1d6 diff --git a/python/test/annotator/similarity/doc_similarity_ranker_test.py b/python/test/annotator/similarity/doc_similarity_ranker_test.py new file mode 100644 index 00000000000000..742ff7e71f2b6f --- /dev/null +++ b/python/test/annotator/similarity/doc_similarity_ranker_test.py @@ -0,0 +1,92 @@ +# Copyright 2017-2022 John Snow Labs +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import unittest + +import pytest + +from sparknlp.annotator import * +from sparknlp.annotator.similarity.document_similarity_ranker import DocumentSimilarityRankerApproach +from sparknlp.base import * +from test.util import SparkSessionForTest + + +@pytest.mark.fast +class DocumentSimilarityRankerTestSpec(unittest.TestCase): + + def setUp(self): + self.data = SparkSessionForTest.spark.createDataFrame([ + ["First document, this is my first sentence. This is my second sentence."], + ["Second document, this is my second sentence. This is my second sentence."], + ["Third document, climate change is arguably one of the most pressing problems of our time."], + ["Fourth document, climate change is definitely one of the most pressing problems of our time."], + ["Fifth document, Florence in Italy, is among the most beautiful cities in Europe."], + ["Sixth document, Florence in Italy, is a very beautiful city in Europe like Lyon in France."], + ["Seventh document, the French Riviera is the Mediterranean coastline of the southeast corner of France."], + ["Eighth document, the warmest place in France is the French Riviera coast in Southern France."] + ]).toDF("text") + + def runTest(self): + document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + sentence_detector = SentenceDetector() \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + tokenizer = Tokenizer() \ + .setInputCols(["sentence"]) \ + .setOutputCol("token") + glove = WordEmbeddingsModel.pretrained() \ + .setInputCols(["sentence", "token"]) \ + .setOutputCol("embeddings") + sentence_embeddings = SentenceEmbeddings() \ + .setInputCols(["sentence", "embeddings"]) \ + .setOutputCol("sentence_embeddings") \ + .setPoolingStrategy("AVERAGE") + + document_similarity_ranker = DocumentSimilarityRankerApproach() \ + .setInputCols("sentence_embeddings") \ + .setOutputCol("doc_similarity_rankings") \ + .setSimilarityMethod("brp") \ + .setNumberOfNeighbours(10) \ + .setBucketLength(2.0) \ + .setNumHashTables(3) \ + .setVisibleDistances(True) \ + .setIdentityRanking(False) + + print(document_similarity_ranker.__dict__) + + # documentSimilarityFinisher = ( + # DocumentSimilarityRankerFinisher() + # .setInputCols("doc_similarity_rankings") + # .setOutputCols( + # "finished_doc_similarity_rankings_id", + # "finished_doc_similarity_rankings_neighbors") + # .setExtractNearestNeighbor(True) + # ) + + pipeline = Pipeline(stages=[ + document_assembler, + sentence_detector, + tokenizer, + glove, + sentence_embeddings, + document_similarity_ranker + # documentSimilarityFinisher + ]) + + model = pipeline.fit(self.data) + model.write().overwrite().save("./tmp_model") + loaded_model = model.load("./tmp_model") + loaded_model.transform(self.data).show() + diff --git a/src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/DocumentSimilarityRankerApproach.scala b/src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/DocumentSimilarityRankerApproach.scala index 407bbef4c031fe..803c9b2c72fc13 100644 --- a/src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/DocumentSimilarityRankerApproach.scala +++ b/src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/DocumentSimilarityRankerApproach.scala @@ -14,28 +14,27 @@ import org.apache.spark.sql.{DataFrame, Dataset} import scala.util.hashing.MurmurHash3 - sealed trait NeighborAnnotation { def neighbors: Array[_] } case class IndexedNeighbors(neighbors: Array[Int]) extends NeighborAnnotation -case class IndexedNeighborsWithDistance(neighbors: Array[(Int, Double)]) extends NeighborAnnotation +case class IndexedNeighborsWithDistance(neighbors: Array[(Int, Double)]) + extends NeighborAnnotation case class NeighborsResultSet(result: (Int, NeighborAnnotation)) - class DocumentSimilarityRankerApproach(override val uid: String) - extends AnnotatorApproach[DocumentSimilarityRankerModel] + extends AnnotatorApproach[DocumentSimilarityRankerModel] with HasStorageRef with HasEnableCachingProperties { override val description: AnnotatorType = "LSH based document similarity annotator" /** Annotator reference id. Used to identify elements in metadata or to refer to this annotator - * type - */ + * type + */ def this() = this(Identifiable.randomUID("DocumentSimilarityRankerApproach")) override val inputAnnotatorTypes: Array[AnnotatorType] = Array(SENTENCE_EMBEDDINGS) @@ -54,11 +53,11 @@ class DocumentSimilarityRankerApproach(override val uid: String) val TEXT = "text" - /** The similarity method used to calculate the neighbours. - * (Default: `"brp"`, Bucketed Random Projection for Euclidean Distance) - * - * @group param - */ + /** The similarity method used to calculate the neighbours. (Default: `"brp"`, Bucketed Random + * Projection for Euclidean Distance) + * + * @group param + */ val similarityMethod = new Param[String]( this, "similarityMethod", @@ -71,9 +70,9 @@ class DocumentSimilarityRankerApproach(override val uid: String) def getSimilarityMethod: String = $(similarityMethod) /** The number of neighbours the model will return (Default:`"10"`). - * - * @group param - */ + * + * @group param + */ val numberOfNeighbours = new Param[Int]( this, "numberOfNeighbours", @@ -122,21 +121,18 @@ class DocumentSimilarityRankerApproach(override val uid: String) def getIdentityRanking: Boolean = $(identityRanking) - setDefault( - inputCols -> Array(SENTENCE_EMBEDDINGS), - outputCol -> DOC_SIMILARITY_RANKINGS, similarityMethod -> "brp", numberOfNeighbours -> 10, bucketLength -> 2.0, numHashTables -> 3, visibleDistances -> false, - identityRanking -> false - ) + identityRanking -> false) - def getNeighborsResultSet(model: BucketedRandomProjectionLSHModel, - query: (Int, DenseVector), - similarityDataset: DataFrame): NeighborsResultSet = { + def getNeighborsResultSet( + model: BucketedRandomProjectionLSHModel, + query: (Int, DenseVector), + similarityDataset: DataFrame): NeighborsResultSet = { query match { case (index, queryVector) => val _similarityDataset = @@ -145,9 +141,10 @@ class DocumentSimilarityRankerApproach(override val uid: String) } else { similarityDataset.where(col("index") =!= index) } - val similarRankedDocs = model.approxNearestNeighbors(_similarityDataset, queryVector, getNumberOfNeighbours) + val similarRankedDocs = + model.approxNearestNeighbors(_similarityDataset, queryVector, getNumberOfNeighbours) - if(getVisibleDistances) { + if (getVisibleDistances) { val rankedNeighboursWithDistances = similarRankedDocs .select(INDEX_COL_NAME, DISTANCE) .collect() @@ -165,15 +162,19 @@ class DocumentSimilarityRankerApproach(override val uid: String) } } - override def train(dataset: Dataset[_], recursivePipeline: Option[PipelineModel]): DocumentSimilarityRankerModel = { + override def train( + dataset: Dataset[_], + recursivePipeline: Option[PipelineModel]): DocumentSimilarityRankerModel = { val lsh = $(similarityMethod) match { - case "brp" => new BucketedRandomProjectionLSH() - .setBucketLength($(bucketLength)) - .setNumHashTables($(numHashTables)) - .setInputCol(LSH_INPUT_COL_NAME) - .setOutputCol(LSH_OUTPUT_COL_NAME) - case _ => throw new IllegalArgumentException(s"${$(similarityMethod)} is not a valid value.") + case "brp" => + new BucketedRandomProjectionLSH() + .setBucketLength($(bucketLength)) + .setNumHashTables($(numHashTables)) + .setInputCol(LSH_INPUT_COL_NAME) + .setOutputCol(LSH_OUTPUT_COL_NAME) + case _ => + throw new IllegalArgumentException(s"${$(similarityMethod)} is not a valid value.") } val embeddingsDataset = dataset.withColumn(LSH_INPUT_COL_NAME, col(INPUT_EMBEDDINGS)) @@ -184,11 +185,12 @@ class DocumentSimilarityRankerApproach(override val uid: String) val model = lsh.fit(similarityDataset) - val mh3UDF = udf { - (s: String) => MurmurHash3.stringHash(s, MurmurHash3.stringSeed) + val mh3UDF = udf { (s: String) => + MurmurHash3.stringHash(s, MurmurHash3.stringSeed) } - val similarityDatasetWithIndex = similarityDataset.withColumn(INDEX_COL_NAME, mh3UDF(col(TEXT))) + val similarityDatasetWithIndex = + similarityDataset.withColumn(INDEX_COL_NAME, mh3UDF(col(TEXT))) val indexedVectorTuples = similarityDatasetWithIndex .select(INDEX_COL_NAME, LSH_INPUT_COL_NAME) @@ -198,11 +200,10 @@ class DocumentSimilarityRankerApproach(override val uid: String) val similarityMappings: Map[Int, NeighborAnnotation] = indexedVectorTuples .map(query => getNeighborsResultSet(model, query, similarityDatasetWithIndex)) - .map(_.result).toMap + .map(_.result) + .toMap new DocumentSimilarityRankerModel() - .setSimilarityMappings( - Map("similarityMappings" -> similarityMappings) - ) + .setSimilarityMappings(Map("similarityMappings" -> similarityMappings)) } } diff --git a/src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/DocumentSimilarityRankerModel.scala b/src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/DocumentSimilarityRankerModel.scala index 8d5aa7a761ff29..d90d9faf177f6f 100644 --- a/src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/DocumentSimilarityRankerModel.scala +++ b/src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/DocumentSimilarityRankerModel.scala @@ -3,7 +3,12 @@ package com.johnsnowlabs.nlp.annotators.similarity import com.johnsnowlabs.nlp.AnnotatorType.{DOC_SIMILARITY_RANKINGS, SENTENCE_EMBEDDINGS} import com.johnsnowlabs.nlp.embeddings.HasEmbeddingsProperties import com.johnsnowlabs.nlp.serialization.MapFeature -import com.johnsnowlabs.nlp.{Annotation, AnnotatorModel, HasSimpleAnnotate, ParamsAndFeaturesWritable} +import com.johnsnowlabs.nlp.{ + Annotation, + AnnotatorModel, + HasSimpleAnnotate, + ParamsAndFeaturesWritable +} import com.johnsnowlabs.storage.HasStorageRef import org.apache.spark.ml.util.{DefaultParamsReadable, Identifiable} import org.apache.spark.sql.functions.col @@ -11,7 +16,7 @@ import org.apache.spark.sql.functions.col import scala.util.hashing.MurmurHash3 class DocumentSimilarityRankerModel(override val uid: String) - extends AnnotatorModel[DocumentSimilarityRankerModel] + extends AnnotatorModel[DocumentSimilarityRankerModel] with HasSimpleAnnotate[DocumentSimilarityRankerModel] with HasStorageRef with HasEmbeddingsProperties @@ -24,49 +29,47 @@ class DocumentSimilarityRankerModel(override val uid: String) def this() = this(Identifiable.randomUID("DOC_SIMILARITY_RANKER")) /** Dictionary of words with their vectors - * - * @group param - */ - val similarityMappings: MapFeature[String, Map[Int, NeighborAnnotation]] = new MapFeature(this, "similarityMappings") + * + * @group param + */ + val similarityMappings: MapFeature[String, Map[Int, NeighborAnnotation]] = + new MapFeature(this, "similarityMappings") /** @group setParam */ - def setSimilarityMappings(value: Map[String, Map[Int, NeighborAnnotation]]): this.type = set(similarityMappings, value) + def setSimilarityMappings(value: Map[String, Map[Int, NeighborAnnotation]]): this.type = + set(similarityMappings, value) - def getSimilarityMappings: Map[Int, NeighborAnnotation] = $$(similarityMappings).getOrElse("similarityMappings", Map.empty) + def getSimilarityMappings: Map[Int, NeighborAnnotation] = + $$(similarityMappings).getOrElse("similarityMappings", Map.empty) - setDefault( - inputCols -> Array(SENTENCE_EMBEDDINGS), - outputCol -> DOC_SIMILARITY_RANKINGS - ) + setDefault(inputCols -> Array(SENTENCE_EMBEDDINGS), outputCol -> DOC_SIMILARITY_RANKINGS) /** takes a document and annotations and produces new annotations of this annotator's annotation - * type - * - * @param annotations - * Annotations that correspond to inputAnnotationCols generated by previous annotators if any - * @return - * any number of annotations processed for every input annotation. Not necessary one to one - * relationship - */ + * type + * + * @param annotations + * Annotations that correspond to inputAnnotationCols generated by previous annotators if any + * @return + * any number of annotations processed for every input annotation. Not necessary one to one + * relationship + */ override def annotate(annotations: Seq[Annotation]): Seq[Annotation] = + annotations.map(annotation => { + val inputResult = annotation.result + val targetIndex = MurmurHash3.stringHash(inputResult, MurmurHash3.stringSeed) + val neighborsAnnotation: NeighborAnnotation = + getSimilarityMappings.getOrElse(targetIndex, IndexedNeighbors(Array.empty)) // index NA - annotations.map( - annotation => { - val inputResult = annotation.result - val targetIndex = MurmurHash3.stringHash(inputResult, MurmurHash3.stringSeed) - val neighborsAnnotation: NeighborAnnotation = getSimilarityMappings.getOrElse(targetIndex, IndexedNeighbors(Array.empty)) // index NA - - Annotation( - annotatorType = outputAnnotatorType, - begin = annotation.begin, - end = annotation.end, - result = annotation.result, - metadata = annotation.metadata - + ("lshId"-> targetIndex.toString) - + ("lshNeighbors" -> neighborsAnnotation.neighbors.mkString("[", ",", "]")) , - embeddings = annotation.embeddings) - } - ) + Annotation( + annotatorType = outputAnnotatorType, + begin = annotation.begin, + end = annotation.end, + result = annotation.result, + metadata = annotation.metadata + + ("lshId" -> targetIndex.toString) + + ("lshNeighbors" -> neighborsAnnotation.neighbors.mkString("[", ",", "]")), + embeddings = annotation.embeddings) + }) } -object DocumentSimilarityRanker extends DefaultParamsReadable[DocumentSimilarityRankerModel] \ No newline at end of file +object DocumentSimilarityRanker extends DefaultParamsReadable[DocumentSimilarityRankerModel] diff --git a/src/main/scala/com/johnsnowlabs/nlp/finisher/DocumentSimilarityRankerFinisher.scala b/src/main/scala/com/johnsnowlabs/nlp/finisher/DocumentSimilarityRankerFinisher.scala index 35ffa546cd4483..a6eb5115a75a21 100644 --- a/src/main/scala/com/johnsnowlabs/nlp/finisher/DocumentSimilarityRankerFinisher.scala +++ b/src/main/scala/com/johnsnowlabs/nlp/finisher/DocumentSimilarityRankerFinisher.scala @@ -9,9 +9,8 @@ import org.apache.spark.sql.functions._ import org.apache.spark.sql.types.{IntegerType, StructType} import org.apache.spark.sql.{DataFrame, Dataset} - -case class DocumentSimilarityRankerFinisher (override val uid: String) - extends Transformer +case class DocumentSimilarityRankerFinisher(override val uid: String) + extends Transformer with DefaultParamsWritable { def this() = this(Identifiable.randomUID("document_similarity_ranker_finisher")) @@ -25,77 +24,85 @@ case class DocumentSimilarityRankerFinisher (override val uid: String) val FINISHED_DOC_SIM_RANKER_NEIGHBORS_DEFAULT = "finished_doc_similarity_rankings_neighbors" /** Name of input annotation cols containing embeddings - * - * @group param - */ + * + * @group param + */ val inputCols: StringArrayParam = - new StringArrayParam(this, "inputCols", "Name of input annotation cols containing similar documents") + new StringArrayParam( + this, + "inputCols", + "Name of input annotation cols containing similar documents") /** Name of input annotation cols containing similar documents - * - * @group setParam - */ + * + * @group setParam + */ def setInputCols(value: Array[String]): this.type = set(inputCols, value) /** Name of input annotation cols containing similar documents - * - * @group setParam - */ + * + * @group setParam + */ def setInputCols(value: String*): this.type = setInputCols(value.toArray) /** Name of DocumentSimilarityRankerFinisher output cols - * - * @group getParam - */ + * + * @group getParam + */ def getInputCols: Array[String] = $(inputCols) /** Name of DocumentSimilarityRankerFinisher output cols - * - * @group param - */ + * + * @group param + */ val outputCols: StringArrayParam = - new StringArrayParam(this, "outputCols", "Name of DocumentSimilarityRankerFinisher output cols") + new StringArrayParam( + this, + "outputCols", + "Name of DocumentSimilarityRankerFinisher output cols") /** Name of DocumentSimilarityRankerFinisher output cols - * - * @group setParam - */ + * + * @group setParam + */ def setOutputCols(value: Array[String]): this.type = set(outputCols, value) /** Name of DocumentSimilarityRankerFinisher output cols - * - * @group setParam - */ + * + * @group setParam + */ def setOutputCols(value: String*): this.type = setOutputCols(value.toArray) /** Name of input annotation cols containing embeddings - * - * @group getParam - */ + * + * @group getParam + */ def getOutputCols: Array[String] = get(outputCols).getOrElse(getInputCols.map("finished_" + _)) val extractNearestNeighbor: BooleanParam = - new BooleanParam(this, "extractNearestNeighbor", doc = "Extract the best neighbors with distance") + new BooleanParam( + this, + "extractNearestNeighbor", + doc = "Extract the best neighbors with distance") /** Set flag to extract best neighbor with distance - * - * @group setParam - */ + * + * @group setParam + */ def setExtractNearestNeighbor(value: Boolean): this.type = set(extractNearestNeighbor, value) /** Name of input annotation cols containing embeddings - * - * @group getParam - */ + * + * @group getParam + */ def getExtractNearestNeighbor: Boolean = $(extractNearestNeighbor) - setDefault( - extractNearestNeighbor -> false - ) + setDefault(extractNearestNeighbor -> false) override def transform(dataset: Dataset[_]): DataFrame = { - require(getOutputCols.length == 1 || getOutputCols.length == 2, + require( + getOutputCols.length == 1 || getOutputCols.length == 2, "Output column array should have length 1 (default case) or 2 when value id and neighbors are assigned.") val (idColName, neighborsColName) = @@ -109,24 +116,32 @@ case class DocumentSimilarityRankerFinisher (override val uid: String) idColName, element_at(col(s"${AnnotatorType.DOC_SIMILARITY_RANKINGS}.metadata"), 1) .getItem(LSH_ID_COL_NAME) - .cast("int") - ) + .cast("int")) .withColumn( neighborsColName, element_at(col(s"${AnnotatorType.DOC_SIMILARITY_RANKINGS}.metadata"), 1) - .getItem(LSH_NEIGHBORS_COL_NAME) - ) + .getItem(LSH_NEIGHBORS_COL_NAME)) val formatted = transformed - .withColumn(s"no_squared_$neighborsColName", regexp_replace(col(neighborsColName), "[\\[\\]]", "")) - .withColumn(s"tuple_extract_$neighborsColName", regexp_extract(col(s"no_squared_$neighborsColName"), "\\((.*?)\\)", 0)) - .withColumn(s"no_rounded_$neighborsColName", regexp_replace(col(s"tuple_extract_$neighborsColName"), "[\\(\\)]", "")) + .withColumn( + s"no_squared_$neighborsColName", + regexp_replace(col(neighborsColName), "[\\[\\]]", "")) + .withColumn( + s"tuple_extract_$neighborsColName", + regexp_extract(col(s"no_squared_$neighborsColName"), "\\((.*?)\\)", 0)) + .withColumn( + s"no_rounded_$neighborsColName", + regexp_replace(col(s"tuple_extract_$neighborsColName"), "[\\(\\)]", "")) val result = - if(getExtractNearestNeighbor) + if (getExtractNearestNeighbor) formatted - .withColumn(s"split_$neighborsColName", split(col(s"no_rounded_$neighborsColName"), ",")) - .withColumn("nearest_neighbor_id", element_at(col(s"split_$neighborsColName"), 1).cast(IntegerType)) + .withColumn( + s"split_$neighborsColName", + split(col(s"no_rounded_$neighborsColName"), ",")) + .withColumn( + "nearest_neighbor_id", + element_at(col(s"split_$neighborsColName"), 1).cast(IntegerType)) .withColumn("nearest_neighbor_distance", element_at(col(s"split_$neighborsColName"), 2)) else formatted @@ -147,9 +162,9 @@ case class DocumentSimilarityRankerFinisher (override val uid: String) FinisherUtil.checkIfInputColsExist(getInputCols, schema) FinisherUtil.checkIfAnnotationColumnIsSparkNLPAnnotation(schema, annotationColumn) - /** Check if the annotationColumn has DocumentSimilarityRanker. It must be - * annotators: DocumentSimilarityRanker - */ + /** Check if the annotationColumn has DocumentSimilarityRanker. It must be annotators: + * DocumentSimilarityRanker + */ require( documentSimilarityRankerAnnotators.contains( schema(annotationColumn).metadata.getString("annotatorType")), diff --git a/src/test/scala/com/johnsnowlabs/nlp/similarity/DocumentSimilarityRankerTestSpec.scala b/src/test/scala/com/johnsnowlabs/nlp/similarity/DocumentSimilarityRankerTestSpec.scala index 784a2cde3077ee..f6885cbc9714a7 100644 --- a/src/test/scala/com/johnsnowlabs/nlp/similarity/DocumentSimilarityRankerTestSpec.scala +++ b/src/test/scala/com/johnsnowlabs/nlp/similarity/DocumentSimilarityRankerTestSpec.scala @@ -20,17 +20,18 @@ class DocumentSimilarityRankerTestSpec extends AnyFlatSpec { "DocumentSimilarityRanker" should "should rank document similarity" taggedAs SlowTest in { - val smallCorpus = spark.createDataFrame( - List( - "First document, this is my first sentence. This is my second sentence.", - "Second document, this is my second sentence. This is my second sentence.", - "Third document, climate change is arguably one of the most pressing problems of our time.", - "Fourth document, climate change is definitely one of the most pressing problems of our time.", - "Fifth document, Florence in Italy, is among the most beautiful cities in Europe.", - "Sixth document, Florence in Italy, is a very beautiful city in Europe like Lyon in France.", - "Seventh document, the French Riviera is the Mediterranean coastline of the southeast corner of France.", - "Eighth document, the warmest place in France is the French Riviera coast in Southern France.", - ).map(Tuple1(_))) + val smallCorpus = spark + .createDataFrame( + List( + "First document, this is my first sentence. This is my second sentence.", + "Second document, this is my second sentence. This is my second sentence.", + "Third document, climate change is arguably one of the most pressing problems of our time.", + "Fourth document, climate change is definitely one of the most pressing problems of our time.", + "Fifth document, Florence in Italy, is among the most beautiful cities in Europe.", + "Sixth document, Florence in Italy, is a very beautiful city in Europe like Lyon in France.", + "Seventh document, the French Riviera is the Mediterranean coastline of the southeast corner of France.", + "Eighth document, the warmest place in France is the French Riviera coast in Southern France.") + .map(Tuple1(_))) .toDF("text") val documentAssembler = new DocumentAssembler() @@ -86,14 +87,14 @@ class DocumentSimilarityRankerTestSpec extends AnyFlatSpec { embeddingsSentence, sentenceFinisher, docSimilarityRanker, - documentSimilarityFinisher - )) + documentSimilarityFinisher)) val transformed = pipeline.fit(smallCorpus).transform(smallCorpus) transformed.printSchema transformed - .select("text", + .select( + "text", "finished_sentence_embeddings", "finished_doc_similarity_rankings_id", "nearest_neighbor_id", @@ -103,4 +104,4 @@ class DocumentSimilarityRankerTestSpec extends AnyFlatSpec { // correct if not empty as inclusive query points are at distance 0.0 from themselves assert(!transformed.where(col("nearest_neighbor_distance") === 0.0).rdd.isEmpty() == true) } -} \ No newline at end of file +} From 625c6432a28e8fbcea245c01cc6f7e5e72378655 Mon Sep 17 00:00:00 2001 From: Stefano Lori Date: Mon, 27 Mar 2023 22:25:15 +0200 Subject: [PATCH 19/26] WIP - fixed umbalanced embeddings Py test --- .../similarity/doc_similarity_ranker_test.py | 109 ++++++++++++++---- .../DocumentSimilarityRankerApproach.scala | 19 ++- .../DocumentSimilarityRankerTestSpec.scala | 4 +- 3 files changed, 100 insertions(+), 32 deletions(-) diff --git a/python/test/annotator/similarity/doc_similarity_ranker_test.py b/python/test/annotator/similarity/doc_similarity_ranker_test.py index 742ff7e71f2b6f..7db6956af03461 100644 --- a/python/test/annotator/similarity/doc_similarity_ranker_test.py +++ b/python/test/annotator/similarity/doc_similarity_ranker_test.py @@ -14,18 +14,35 @@ import unittest import pytest +from pyspark.sql import SparkSession from sparknlp.annotator import * from sparknlp.annotator.similarity.document_similarity_ranker import DocumentSimilarityRankerApproach from sparknlp.base import * -from test.util import SparkSessionForTest + + +# from test.util import SparkSessionForTest @pytest.mark.fast class DocumentSimilarityRankerTestSpec(unittest.TestCase): def setUp(self): - self.data = SparkSessionForTest.spark.createDataFrame([ + jars_path = "/Users/stefanolori/workspace/dev/oth/spark-nlp/python/sparknlp/lib/sparknlp.jar" + spark = SparkSession.builder \ + .master("local[*]") \ + .config("spark.jars", jars_path) \ + .config("spark.driver.memory", "12G") \ + .config("spark.driver.maxResultSize", "2G") \ + .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer") \ + .config("spark.kryoserializer.buffer.max", "500m") \ + .getOrCreate() + + spark.sparkContext.setLogLevel("WARN") + + # FIXME rollback the setting up from utility class for test + # self.data = SparkSessionForTest.spark.createDataFrame([ + self.data = spark.createDataFrame([ ["First document, this is my first sentence. This is my second sentence."], ["Second document, this is my second sentence. This is my second sentence."], ["Third document, climate change is arguably one of the most pressing problems of our time."], @@ -46,14 +63,13 @@ def runTest(self): tokenizer = Tokenizer() \ .setInputCols(["sentence"]) \ .setOutputCol("token") - glove = WordEmbeddingsModel.pretrained() \ - .setInputCols(["sentence", "token"]) \ - .setOutputCol("embeddings") - sentence_embeddings = SentenceEmbeddings() \ - .setInputCols(["sentence", "embeddings"]) \ - .setOutputCol("sentence_embeddings") \ - .setPoolingStrategy("AVERAGE") + sentence_embeddings = RoBertaSentenceEmbeddings.pretrained() \ + .setInputCols(["document"]) \ + .setOutputCol("sentence_embeddings") + + # TODO add document_similarity_ranker minmax LSH + # TODO add document_similarity_ranker with input col embeddings too document_similarity_ranker = DocumentSimilarityRankerApproach() \ .setInputCols("sentence_embeddings") \ .setOutputCol("doc_similarity_rankings") \ @@ -64,29 +80,72 @@ def runTest(self): .setVisibleDistances(True) \ .setIdentityRanking(False) - print(document_similarity_ranker.__dict__) - - # documentSimilarityFinisher = ( - # DocumentSimilarityRankerFinisher() - # .setInputCols("doc_similarity_rankings") - # .setOutputCols( - # "finished_doc_similarity_rankings_id", - # "finished_doc_similarity_rankings_neighbors") - # .setExtractNearestNeighbor(True) - # ) - pipeline = Pipeline(stages=[ document_assembler, sentence_detector, tokenizer, - glove, sentence_embeddings, document_similarity_ranker - # documentSimilarityFinisher + # TODO add document_similarity_ranker_finisher ]) model = pipeline.fit(self.data) - model.write().overwrite().save("./tmp_model") - loaded_model = model.load("./tmp_model") - loaded_model.transform(self.data).show() + # TODO add write/read pipeline + model.transform(self.data).show() + # FIXME encoding on GloVe generates different embeddings length + # def runTest(self): + # document_assembler = DocumentAssembler() \ + # .setInputCol("text") \ + # .setOutputCol("document") + # sentence_detector = SentenceDetector() \ + # .setInputCols(["document"]) \ + # .setOutputCol("sentence") + # tokenizer = Tokenizer() \ + # .setInputCols(["sentence"]) \ + # .setOutputCol("token") + # + # glove = WordEmbeddingsModel.pretrained() \ + # .setInputCols(["sentence", "token"]) \ + # .setOutputCol("embeddings") + # + # sentence_embeddings = SentenceEmbeddings() \ + # .setInputCols(["sentence", "embeddings"]) \ + # .setOutputCol("sentence_embeddings") \ + # .setPoolingStrategy("AVERAGE") + # + # document_similarity_ranker = DocumentSimilarityRankerApproach() \ + # .setInputCols("sentence_embeddings") \ + # .setOutputCol("doc_similarity_rankings") \ + # .setSimilarityMethod("brp") \ + # .setNumberOfNeighbours(10) \ + # .setBucketLength(2.0) \ + # .setNumHashTables(3) \ + # .setVisibleDistances(True) \ + # .setIdentityRanking(True) + # + # print(document_similarity_ranker.__dict__) + # + # # documentSimilarityFinisher = ( + # # DocumentSimilarityRankerFinisher() + # # .setInputCols("doc_similarity_rankings") + # # .setOutputCols( + # # "finished_doc_similarity_rankings_id", + # # "finished_doc_similarity_rankings_neighbors") + # # .setExtractNearestNeighbor(True) + # # ) + # + # pipeline = Pipeline(stages=[ + # document_assembler, + # sentence_detector, + # tokenizer, + # glove, + # sentence_embeddings, + # document_similarity_ranker + # ]) + # + # model = pipeline.fit(self.data) + # # model.write().overwrite().save("./tmp_model") + # # loaded_model = model.load("./tmp_model") + # # loaded_model.transform(self.data).show() + # model.transform(self.data).show() diff --git a/src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/DocumentSimilarityRankerApproach.scala b/src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/DocumentSimilarityRankerApproach.scala index 803c9b2c72fc13..76e2c861a09fcc 100644 --- a/src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/DocumentSimilarityRankerApproach.scala +++ b/src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/DocumentSimilarityRankerApproach.scala @@ -6,7 +6,7 @@ import com.johnsnowlabs.storage.HasStorageRef import org.apache.spark.ml.PipelineModel import org.apache.spark.ml.feature.{BucketedRandomProjectionLSH, BucketedRandomProjectionLSHModel} import org.apache.spark.ml.functions.array_to_vector -import org.apache.spark.ml.linalg.DenseVector +import org.apache.spark.ml.linalg.Vector import org.apache.spark.ml.param.{BooleanParam, Param} import org.apache.spark.ml.util.Identifiable import org.apache.spark.sql.functions.{col, flatten, udf} @@ -131,8 +131,9 @@ class DocumentSimilarityRankerApproach(override val uid: String) def getNeighborsResultSet( model: BucketedRandomProjectionLSHModel, - query: (Int, DenseVector), + query: (Int, Vector), similarityDataset: DataFrame): NeighborsResultSet = { + query match { case (index, queryVector) => val _similarityDataset = @@ -141,6 +142,11 @@ class DocumentSimilarityRankerApproach(override val uid: String) } else { similarityDataset.where(col("index") =!= index) } + + // FIXME remove it + // _similarityDataset.select(col(INDEX_COL_NAME), col(LSH_INPUT_COL_NAME)).show(false) + // println(s"Searching query:\n" + queryVector) + val similarRankedDocs = model.approxNearestNeighbors(_similarityDataset, queryVector, getNumberOfNeighbours) @@ -159,6 +165,7 @@ class DocumentSimilarityRankerApproach(override val uid: String) NeighborsResultSet(index, IndexedNeighbors(rankedNeighbours)) } + case _ => throw new IllegalArgumentException("query is not of type (Int, DenseVector)") } } @@ -189,15 +196,17 @@ class DocumentSimilarityRankerApproach(override val uid: String) MurmurHash3.stringHash(s, MurmurHash3.stringSeed) } - val similarityDatasetWithIndex = - similarityDataset.withColumn(INDEX_COL_NAME, mh3UDF(col(TEXT))) + val similarityDatasetWithIndex = similarityDataset.withColumn(INDEX_COL_NAME, mh3UDF(col(TEXT))) val indexedVectorTuples = similarityDatasetWithIndex .select(INDEX_COL_NAME, LSH_INPUT_COL_NAME) .rdd - .map(x => (x.getAs[Int](INDEX_COL_NAME), x.getAs[DenseVector](LSH_INPUT_COL_NAME))) + .map(x => (x.getAs[Int](INDEX_COL_NAME), x.getAs[Vector](LSH_INPUT_COL_NAME))) .collect() + // FIXME remove it + // println(indexedVectorTuples.mkString("\n")) + val similarityMappings: Map[Int, NeighborAnnotation] = indexedVectorTuples .map(query => getNeighborsResultSet(model, query, similarityDatasetWithIndex)) .map(_.result) diff --git a/src/test/scala/com/johnsnowlabs/nlp/similarity/DocumentSimilarityRankerTestSpec.scala b/src/test/scala/com/johnsnowlabs/nlp/similarity/DocumentSimilarityRankerTestSpec.scala index f6885cbc9714a7..7f38c5ff4b9be5 100644 --- a/src/test/scala/com/johnsnowlabs/nlp/similarity/DocumentSimilarityRankerTestSpec.scala +++ b/src/test/scala/com/johnsnowlabs/nlp/similarity/DocumentSimilarityRankerTestSpec.scala @@ -91,11 +91,11 @@ class DocumentSimilarityRankerTestSpec extends AnyFlatSpec { val transformed = pipeline.fit(smallCorpus).transform(smallCorpus) - transformed.printSchema + // transformed.printSchema transformed .select( "text", - "finished_sentence_embeddings", + //"finished_sentence_embeddings", "finished_doc_similarity_rankings_id", "nearest_neighbor_id", "nearest_neighbor_distance") From eef709ea7a3e938c7e954eb85233cf40cc3717c6 Mon Sep 17 00:00:00 2001 From: Stefano Lori Date: Tue, 28 Mar 2023 22:35:10 +0200 Subject: [PATCH 20/26] Added MinHash engine to doc sim ranker --- .../similarity/doc_similarity_ranker_test.py | 42 ++++++++- .../DocumentSimilarityRankerApproach.scala | 54 +++++------ .../DocumentSimilarityRankerTestSpec.scala | 91 ++++++++++++++++++- 3 files changed, 155 insertions(+), 32 deletions(-) diff --git a/python/test/annotator/similarity/doc_similarity_ranker_test.py b/python/test/annotator/similarity/doc_similarity_ranker_test.py index 7db6956af03461..6d98de2ef12981 100644 --- a/python/test/annotator/similarity/doc_similarity_ranker_test.py +++ b/python/test/annotator/similarity/doc_similarity_ranker_test.py @@ -15,12 +15,10 @@ import pytest from pyspark.sql import SparkSession - from sparknlp.annotator import * from sparknlp.annotator.similarity.document_similarity_ranker import DocumentSimilarityRankerApproach from sparknlp.base import * - # from test.util import SparkSessionForTest @@ -68,7 +66,6 @@ def runTest(self): .setInputCols(["document"]) \ .setOutputCol("sentence_embeddings") - # TODO add document_similarity_ranker minmax LSH # TODO add document_similarity_ranker with input col embeddings too document_similarity_ranker = DocumentSimilarityRankerApproach() \ .setInputCols("sentence_embeddings") \ @@ -93,6 +90,45 @@ def runTest(self): # TODO add write/read pipeline model.transform(self.data).show() + def runTest(self): + document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + sentence_detector = SentenceDetector() \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + tokenizer = Tokenizer() \ + .setInputCols(["sentence"]) \ + .setOutputCol("token") + + sentence_embeddings = RoBertaSentenceEmbeddings.pretrained() \ + .setInputCols(["document"]) \ + .setOutputCol("sentence_embeddings") + + # TODO add document_similarity_ranker with input col embeddings too + document_similarity_ranker = DocumentSimilarityRankerApproach() \ + .setInputCols("sentence_embeddings") \ + .setOutputCol("doc_similarity_rankings") \ + .setSimilarityMethod("mh") \ + .setNumberOfNeighbours(10) \ + .setNumHashTables(3) \ + .setVisibleDistances(True) \ + .setIdentityRanking(False) + + pipeline = Pipeline(stages=[ + document_assembler, + sentence_detector, + tokenizer, + sentence_embeddings, + document_similarity_ranker + # TODO add document_similarity_ranker_finisher + ]) + + model = pipeline.fit(self.data) + # TODO add write/read pipeline + transformed = model.transform(self.data) + transformed.show() + # FIXME encoding on GloVe generates different embeddings length # def runTest(self): # document_assembler = DocumentAssembler() \ diff --git a/src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/DocumentSimilarityRankerApproach.scala b/src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/DocumentSimilarityRankerApproach.scala index 76e2c861a09fcc..721a0d7baf7fd7 100644 --- a/src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/DocumentSimilarityRankerApproach.scala +++ b/src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/DocumentSimilarityRankerApproach.scala @@ -4,7 +4,11 @@ import com.johnsnowlabs.nlp.AnnotatorType.{DOC_SIMILARITY_RANKINGS, SENTENCE_EMB import com.johnsnowlabs.nlp.{AnnotatorApproach, HasEnableCachingProperties} import com.johnsnowlabs.storage.HasStorageRef import org.apache.spark.ml.PipelineModel -import org.apache.spark.ml.feature.{BucketedRandomProjectionLSH, BucketedRandomProjectionLSHModel} +import org.apache.spark.ml.feature.{ + BucketedRandomProjectionLSH, + BucketedRandomProjectionLSHModel, + MinHashLSH +} import org.apache.spark.ml.functions.array_to_vector import org.apache.spark.ml.linalg.Vector import org.apache.spark.ml.param.{BooleanParam, Param} @@ -130,10 +134,27 @@ class DocumentSimilarityRankerApproach(override val uid: String) identityRanking -> false) def getNeighborsResultSet( - model: BucketedRandomProjectionLSHModel, query: (Int, Vector), similarityDataset: DataFrame): NeighborsResultSet = { + val lsh = $(similarityMethod) match { + case "brp" => + new BucketedRandomProjectionLSH() + .setBucketLength($(bucketLength)) + .setNumHashTables($(numHashTables)) + .setInputCol(LSH_INPUT_COL_NAME) + .setOutputCol(LSH_OUTPUT_COL_NAME) + case "mh" => + new MinHashLSH() + .setNumHashTables($(numHashTables)) + .setInputCol(LSH_INPUT_COL_NAME) + .setOutputCol(LSH_OUTPUT_COL_NAME) + case _ => + throw new IllegalArgumentException(s"${$(similarityMethod)} is not a valid value.") + } + + val model = lsh.fit(similarityDataset) + query match { case (index, queryVector) => val _similarityDataset = @@ -143,10 +164,6 @@ class DocumentSimilarityRankerApproach(override val uid: String) similarityDataset.where(col("index") =!= index) } - // FIXME remove it - // _similarityDataset.select(col(INDEX_COL_NAME), col(LSH_INPUT_COL_NAME)).show(false) - // println(s"Searching query:\n" + queryVector) - val similarRankedDocs = model.approxNearestNeighbors(_similarityDataset, queryVector, getNumberOfNeighbours) @@ -173,30 +190,16 @@ class DocumentSimilarityRankerApproach(override val uid: String) dataset: Dataset[_], recursivePipeline: Option[PipelineModel]): DocumentSimilarityRankerModel = { - val lsh = $(similarityMethod) match { - case "brp" => - new BucketedRandomProjectionLSH() - .setBucketLength($(bucketLength)) - .setNumHashTables($(numHashTables)) - .setInputCol(LSH_INPUT_COL_NAME) - .setOutputCol(LSH_OUTPUT_COL_NAME) - case _ => - throw new IllegalArgumentException(s"${$(similarityMethod)} is not a valid value.") - } - val embeddingsDataset = dataset.withColumn(LSH_INPUT_COL_NAME, col(INPUT_EMBEDDINGS)) val similarityDataset: DataFrame = embeddingsDataset .withColumn(s"$LSH_INPUT_COL_NAME", flatten(col(s"$LSH_INPUT_COL_NAME"))) .withColumn(s"$LSH_INPUT_COL_NAME", array_to_vector(col(s"$LSH_INPUT_COL_NAME"))) - val model = lsh.fit(similarityDataset) - - val mh3UDF = udf { (s: String) => - MurmurHash3.stringHash(s, MurmurHash3.stringSeed) - } + val mh3UDF = udf { (s: String) => MurmurHash3.stringHash(s, MurmurHash3.stringSeed) } - val similarityDatasetWithIndex = similarityDataset.withColumn(INDEX_COL_NAME, mh3UDF(col(TEXT))) + val similarityDatasetWithIndex = + similarityDataset.withColumn(INDEX_COL_NAME, mh3UDF(col(TEXT))) val indexedVectorTuples = similarityDatasetWithIndex .select(INDEX_COL_NAME, LSH_INPUT_COL_NAME) @@ -204,11 +207,8 @@ class DocumentSimilarityRankerApproach(override val uid: String) .map(x => (x.getAs[Int](INDEX_COL_NAME), x.getAs[Vector](LSH_INPUT_COL_NAME))) .collect() - // FIXME remove it - // println(indexedVectorTuples.mkString("\n")) - val similarityMappings: Map[Int, NeighborAnnotation] = indexedVectorTuples - .map(query => getNeighborsResultSet(model, query, similarityDatasetWithIndex)) + .map(query => getNeighborsResultSet(query, similarityDatasetWithIndex)) .map(_.result) .toMap diff --git a/src/test/scala/com/johnsnowlabs/nlp/similarity/DocumentSimilarityRankerTestSpec.scala b/src/test/scala/com/johnsnowlabs/nlp/similarity/DocumentSimilarityRankerTestSpec.scala index 7f38c5ff4b9be5..aa09e99d31550a 100644 --- a/src/test/scala/com/johnsnowlabs/nlp/similarity/DocumentSimilarityRankerTestSpec.scala +++ b/src/test/scala/com/johnsnowlabs/nlp/similarity/DocumentSimilarityRankerTestSpec.scala @@ -18,7 +18,7 @@ import org.scalatest.flatspec.AnyFlatSpec class DocumentSimilarityRankerTestSpec extends AnyFlatSpec { val spark: SparkSession = ResourceHelper.spark - "DocumentSimilarityRanker" should "should rank document similarity" taggedAs SlowTest in { + "DocumentSimilarityRanker" should "should use brp to rank document similarity" taggedAs SlowTest in { val smallCorpus = spark .createDataFrame( @@ -95,7 +95,94 @@ class DocumentSimilarityRankerTestSpec extends AnyFlatSpec { transformed .select( "text", - //"finished_sentence_embeddings", + // "finished_sentence_embeddings", + "finished_doc_similarity_rankings_id", + "nearest_neighbor_id", + "nearest_neighbor_distance") + .show(false) + + // correct if not empty as inclusive query points are at distance 0.0 from themselves + assert(!transformed.where(col("nearest_neighbor_distance") === 0.0).rdd.isEmpty() == true) + } + + "DocumentSimilarityRanker" should "should use min hash to rank document similarity" taggedAs SlowTest in { + + val smallCorpus = spark + .createDataFrame( + List( + "First document, this is my first sentence. This is my second sentence.", + "Second document, this is my second sentence. This is my second sentence.", + "Third document, climate change is arguably one of the most pressing problems of our time.", + "Fourth document, climate change is definitely one of the most pressing problems of our time.", + "Fifth document, Florence in Italy, is among the most beautiful cities in Europe.", + "Sixth document, Florence in Italy, is a very beautiful city in Europe like Lyon in France.", + "Seventh document, the French Riviera is the Mediterranean coastline of the southeast corner of France.", + "Eighth document, the warmest place in France is the French Riviera coast in Southern France.") + .map(Tuple1(_))) + .toDF("text") + + val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + + val sentence = new SentenceDetector() + .setInputCols("document") + .setOutputCol("sentence") + + val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + + val embeddings = AnnotatorBuilder + .getGLoveEmbeddings(smallCorpus) + .setInputCols("document", "token") + .setOutputCol("embeddings") + .setCaseSensitive(false) + + val embeddingsSentence = new SentenceEmbeddings() + .setInputCols(Array("document", "embeddings")) + .setOutputCol("sentence_embeddings") + .setPoolingStrategy("AVERAGE") + + val sentenceFinisher = new EmbeddingsFinisher() + .setInputCols("sentence_embeddings") + .setOutputCols("finished_sentence_embeddings") + .setCleanAnnotations(false) + + val docSimilarityRanker = new DocumentSimilarityRankerApproach() + .setInputCols("sentence_embeddings") + .setOutputCol(DOC_SIMILARITY_RANKINGS) + .setSimilarityMethod("mh") + .setNumberOfNeighbours(3) + .setVisibleDistances(true) + .setIdentityRanking(false) + + val documentSimilarityFinisher = new DocumentSimilarityRankerFinisher() + .setInputCols("doc_similarity_rankings") + .setOutputCols( + "finished_doc_similarity_rankings_id", + "finished_doc_similarity_rankings_neighbors") + .setExtractNearestNeighbor(true) + + val pipeline = new Pipeline() + .setStages( + Array( + documentAssembler, + sentence, + tokenizer, + embeddings, + embeddingsSentence, + sentenceFinisher, + docSimilarityRanker, + documentSimilarityFinisher)) + + val transformed = pipeline.fit(smallCorpus).transform(smallCorpus) + + // transformed.printSchema + transformed + .select( + "text", + // "finished_sentence_embeddings", "finished_doc_similarity_rankings_id", "nearest_neighbor_id", "nearest_neighbor_distance") From 5a38c8e260fac2aaf85dc4bfb7307c8520cda8b1 Mon Sep 17 00:00:00 2001 From: Stefano Lori Date: Thu, 13 Apr 2023 20:27:09 +0200 Subject: [PATCH 21/26] Fixed serde for ranker map params --- .../DocumentSimilarityRankerApproach.scala | 22 +++++++++++-------- .../DocumentSimilarityRankerModel.scala | 12 ++++------ .../DocumentSimilarityRankerFinisher.scala | 6 +++-- .../DocumentSimilarityRankerTestSpec.scala | 18 ++++++++++++--- 4 files changed, 36 insertions(+), 22 deletions(-) diff --git a/src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/DocumentSimilarityRankerApproach.scala b/src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/DocumentSimilarityRankerApproach.scala index 721a0d7baf7fd7..03db4def80ec98 100644 --- a/src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/DocumentSimilarityRankerApproach.scala +++ b/src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/DocumentSimilarityRankerApproach.scala @@ -4,15 +4,11 @@ import com.johnsnowlabs.nlp.AnnotatorType.{DOC_SIMILARITY_RANKINGS, SENTENCE_EMB import com.johnsnowlabs.nlp.{AnnotatorApproach, HasEnableCachingProperties} import com.johnsnowlabs.storage.HasStorageRef import org.apache.spark.ml.PipelineModel -import org.apache.spark.ml.feature.{ - BucketedRandomProjectionLSH, - BucketedRandomProjectionLSHModel, - MinHashLSH -} +import org.apache.spark.ml.feature.{BucketedRandomProjectionLSH, BucketedRandomProjectionLSHModel, MinHashLSH} import org.apache.spark.ml.functions.array_to_vector import org.apache.spark.ml.linalg.Vector import org.apache.spark.ml.param.{BooleanParam, Param} -import org.apache.spark.ml.util.Identifiable +import org.apache.spark.ml.util.{DefaultParamsReadable, Identifiable} import org.apache.spark.sql.functions.{col, flatten, udf} import org.apache.spark.sql.{DataFrame, Dataset} @@ -22,16 +18,18 @@ sealed trait NeighborAnnotation { def neighbors: Array[_] } + case class IndexedNeighbors(neighbors: Array[Int]) extends NeighborAnnotation -case class IndexedNeighborsWithDistance(neighbors: Array[(Int, Double)]) - extends NeighborAnnotation + +case class IndexedNeighborsWithDistance(neighbors: Array[(Int, Double)]) extends NeighborAnnotation + case class NeighborsResultSet(result: (Int, NeighborAnnotation)) + class DocumentSimilarityRankerApproach(override val uid: String) extends AnnotatorApproach[DocumentSimilarityRankerModel] - with HasStorageRef with HasEnableCachingProperties { override val description: AnnotatorType = "LSH based document similarity annotator" @@ -216,3 +214,9 @@ class DocumentSimilarityRankerApproach(override val uid: String) .setSimilarityMappings(Map("similarityMappings" -> similarityMappings)) } } + + +/** This is the companion object of [[DocumentSimilarityRankerApproach]]. Please refer to that class for the + * documentation. + */ +object DocumentSimilarityRankerApproach extends DefaultParamsReadable[DocumentSimilarityRankerApproach] \ No newline at end of file diff --git a/src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/DocumentSimilarityRankerModel.scala b/src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/DocumentSimilarityRankerModel.scala index d90d9faf177f6f..b22e6d0fa38a7e 100644 --- a/src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/DocumentSimilarityRankerModel.scala +++ b/src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/DocumentSimilarityRankerModel.scala @@ -3,12 +3,7 @@ package com.johnsnowlabs.nlp.annotators.similarity import com.johnsnowlabs.nlp.AnnotatorType.{DOC_SIMILARITY_RANKINGS, SENTENCE_EMBEDDINGS} import com.johnsnowlabs.nlp.embeddings.HasEmbeddingsProperties import com.johnsnowlabs.nlp.serialization.MapFeature -import com.johnsnowlabs.nlp.{ - Annotation, - AnnotatorModel, - HasSimpleAnnotate, - ParamsAndFeaturesWritable -} +import com.johnsnowlabs.nlp.{Annotation, AnnotatorModel, HasSimpleAnnotate, ParamsAndFeaturesReadable, ParamsAndFeaturesWritable} import com.johnsnowlabs.storage.HasStorageRef import org.apache.spark.ml.util.{DefaultParamsReadable, Identifiable} import org.apache.spark.sql.functions.col @@ -18,7 +13,6 @@ import scala.util.hashing.MurmurHash3 class DocumentSimilarityRankerModel(override val uid: String) extends AnnotatorModel[DocumentSimilarityRankerModel] with HasSimpleAnnotate[DocumentSimilarityRankerModel] - with HasStorageRef with HasEmbeddingsProperties with ParamsAndFeaturesWritable { @@ -72,4 +66,6 @@ class DocumentSimilarityRankerModel(override val uid: String) }) } -object DocumentSimilarityRanker extends DefaultParamsReadable[DocumentSimilarityRankerModel] +trait ReadableDocumentSimilarityRanker extends ParamsAndFeaturesReadable[DocumentSimilarityRankerModel] + +object DocumentSimilarityRankerModel extends ReadableDocumentSimilarityRanker diff --git a/src/main/scala/com/johnsnowlabs/nlp/finisher/DocumentSimilarityRankerFinisher.scala b/src/main/scala/com/johnsnowlabs/nlp/finisher/DocumentSimilarityRankerFinisher.scala index a6eb5115a75a21..257a0c92cce43e 100644 --- a/src/main/scala/com/johnsnowlabs/nlp/finisher/DocumentSimilarityRankerFinisher.scala +++ b/src/main/scala/com/johnsnowlabs/nlp/finisher/DocumentSimilarityRankerFinisher.scala @@ -4,7 +4,7 @@ import com.johnsnowlabs.nlp.AnnotatorType import com.johnsnowlabs.nlp.util.FinisherUtil import org.apache.spark.ml.Transformer import org.apache.spark.ml.param.{BooleanParam, ParamMap, StringArrayParam} -import org.apache.spark.ml.util.{DefaultParamsWritable, Identifiable} +import org.apache.spark.ml.util.{DefaultParamsReadable, DefaultParamsWritable, Identifiable} import org.apache.spark.sql.functions._ import org.apache.spark.sql.types.{IntegerType, StructType} import org.apache.spark.sql.{DataFrame, Dataset} @@ -13,7 +13,7 @@ case class DocumentSimilarityRankerFinisher(override val uid: String) extends Transformer with DefaultParamsWritable { - def this() = this(Identifiable.randomUID("document_similarity_ranker_finisher")) + def this() = this(Identifiable.randomUID("DOCUMENT_SIMILARITY_RANKER_FINISHER")) val LSH_ID_COL_NAME = "lshId" @@ -176,3 +176,5 @@ case class DocumentSimilarityRankerFinisher(override val uid: String) StructType(outputFields) } } + +object DocumentSimilarityRankerFinisher extends DefaultParamsReadable[DocumentSimilarityRankerFinisher] \ No newline at end of file diff --git a/src/test/scala/com/johnsnowlabs/nlp/similarity/DocumentSimilarityRankerTestSpec.scala b/src/test/scala/com/johnsnowlabs/nlp/similarity/DocumentSimilarityRankerTestSpec.scala index aa09e99d31550a..cee7d07fc80c96 100644 --- a/src/test/scala/com/johnsnowlabs/nlp/similarity/DocumentSimilarityRankerTestSpec.scala +++ b/src/test/scala/com/johnsnowlabs/nlp/similarity/DocumentSimilarityRankerTestSpec.scala @@ -10,7 +10,7 @@ import com.johnsnowlabs.nlp.finisher.DocumentSimilarityRankerFinisher import com.johnsnowlabs.nlp.util.io.ResourceHelper import com.johnsnowlabs.nlp.{AnnotatorBuilder, EmbeddingsFinisher} import com.johnsnowlabs.tags.SlowTest -import org.apache.spark.ml.Pipeline +import org.apache.spark.ml.{Pipeline, PipelineModel} import org.apache.spark.sql.SparkSession import org.apache.spark.sql.functions.col import org.scalatest.flatspec.AnyFlatSpec @@ -89,7 +89,13 @@ class DocumentSimilarityRankerTestSpec extends AnyFlatSpec { docSimilarityRanker, documentSimilarityFinisher)) - val transformed = pipeline.fit(smallCorpus).transform(smallCorpus) + val trainedPipelineModel = pipeline.fit(smallCorpus) + + val pipelineModelLoc = "./tmp_doc_sim_ranker_brp_pipeline" + trainedPipelineModel.write.overwrite().save(pipelineModelLoc) + val pipelineModel = PipelineModel.load(pipelineModelLoc) + + val transformed = pipelineModel.transform(smallCorpus) // transformed.printSchema transformed @@ -176,7 +182,13 @@ class DocumentSimilarityRankerTestSpec extends AnyFlatSpec { docSimilarityRanker, documentSimilarityFinisher)) - val transformed = pipeline.fit(smallCorpus).transform(smallCorpus) + val trainedPipelineModel = pipeline.fit(smallCorpus) + + val pipelineModelLoc = "./tmp_doc_sim_ranker_mh_pipeline" + trainedPipelineModel.write.overwrite().save(pipelineModelLoc) + val pipelineModel = PipelineModel.load(pipelineModelLoc) + + val transformed = pipelineModel.transform(smallCorpus) // transformed.printSchema transformed From 72ebae717155987dcebadd84a2f1d0e8f3bc2cad Mon Sep 17 00:00:00 2001 From: Stefano Lori Date: Tue, 18 Apr 2023 22:21:04 +0200 Subject: [PATCH 22/26] Clean up pytests --- .../similarity/document_similarity_ranker.py | 4 ++-- .../similarity/doc_similarity_ranker_test.py | 15 ++---------- .../DocumentSimilarityRankerApproach.scala | 23 ++++++++++--------- .../DocumentSimilarityRankerModel.scala | 11 +++++++-- .../DocumentSimilarityRankerFinisher.scala | 3 ++- 5 files changed, 27 insertions(+), 29 deletions(-) diff --git a/python/sparknlp/annotator/similarity/document_similarity_ranker.py b/python/sparknlp/annotator/similarity/document_similarity_ranker.py index 75fe114bff4ede..c2c5175b48c763 100644 --- a/python/sparknlp/annotator/similarity/document_similarity_ranker.py +++ b/python/sparknlp/annotator/similarity/document_similarity_ranker.py @@ -16,7 +16,7 @@ from sparknlp.common import * -class DocumentSimilarityRankerApproach(AnnotatorApproach, HasStorageRef, HasEnableCachingProperties): +class DocumentSimilarityRankerApproach(AnnotatorApproach, HasEnableCachingProperties): inputAnnotatorTypes = [AnnotatorType.SENTENCE_EMBEDDINGS] outputAnnotatorType = AnnotatorType.DOC_SIMILARITY_RANKINGS @@ -136,7 +136,7 @@ def _create_model(self, java_model): return DocumentSimilarityRankerModel(java_model=java_model) -class DocumentSimilarityRankerModel(AnnotatorModel, HasStorageRef, HasEmbeddingsProperties): +class DocumentSimilarityRankerModel(AnnotatorModel, HasEmbeddingsProperties): name = "DocumentSimilarityRankerModel" inputAnnotatorTypes = [AnnotatorType.SENTENCE_EMBEDDINGS] diff --git a/python/test/annotator/similarity/doc_similarity_ranker_test.py b/python/test/annotator/similarity/doc_similarity_ranker_test.py index 6d98de2ef12981..a16441b81994a9 100644 --- a/python/test/annotator/similarity/doc_similarity_ranker_test.py +++ b/python/test/annotator/similarity/doc_similarity_ranker_test.py @@ -22,21 +22,10 @@ # from test.util import SparkSessionForTest -@pytest.mark.fast +@pytest.mark.slow class DocumentSimilarityRankerTestSpec(unittest.TestCase): - def setUp(self): - jars_path = "/Users/stefanolori/workspace/dev/oth/spark-nlp/python/sparknlp/lib/sparknlp.jar" - spark = SparkSession.builder \ - .master("local[*]") \ - .config("spark.jars", jars_path) \ - .config("spark.driver.memory", "12G") \ - .config("spark.driver.maxResultSize", "2G") \ - .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer") \ - .config("spark.kryoserializer.buffer.max", "500m") \ - .getOrCreate() - - spark.sparkContext.setLogLevel("WARN") + self.spark = SparkContextForTest.spark # FIXME rollback the setting up from utility class for test # self.data = SparkSessionForTest.spark.createDataFrame([ diff --git a/src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/DocumentSimilarityRankerApproach.scala b/src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/DocumentSimilarityRankerApproach.scala index 03db4def80ec98..1282303c995815 100644 --- a/src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/DocumentSimilarityRankerApproach.scala +++ b/src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/DocumentSimilarityRankerApproach.scala @@ -4,7 +4,11 @@ import com.johnsnowlabs.nlp.AnnotatorType.{DOC_SIMILARITY_RANKINGS, SENTENCE_EMB import com.johnsnowlabs.nlp.{AnnotatorApproach, HasEnableCachingProperties} import com.johnsnowlabs.storage.HasStorageRef import org.apache.spark.ml.PipelineModel -import org.apache.spark.ml.feature.{BucketedRandomProjectionLSH, BucketedRandomProjectionLSHModel, MinHashLSH} +import org.apache.spark.ml.feature.{ + BucketedRandomProjectionLSH, + BucketedRandomProjectionLSHModel, + MinHashLSH +} import org.apache.spark.ml.functions.array_to_vector import org.apache.spark.ml.linalg.Vector import org.apache.spark.ml.param.{BooleanParam, Param} @@ -18,16 +22,13 @@ sealed trait NeighborAnnotation { def neighbors: Array[_] } - case class IndexedNeighbors(neighbors: Array[Int]) extends NeighborAnnotation - -case class IndexedNeighborsWithDistance(neighbors: Array[(Int, Double)]) extends NeighborAnnotation - +case class IndexedNeighborsWithDistance(neighbors: Array[(Int, Double)]) + extends NeighborAnnotation case class NeighborsResultSet(result: (Int, NeighborAnnotation)) - class DocumentSimilarityRankerApproach(override val uid: String) extends AnnotatorApproach[DocumentSimilarityRankerModel] with HasEnableCachingProperties { @@ -215,8 +216,8 @@ class DocumentSimilarityRankerApproach(override val uid: String) } } - -/** This is the companion object of [[DocumentSimilarityRankerApproach]]. Please refer to that class for the - * documentation. - */ -object DocumentSimilarityRankerApproach extends DefaultParamsReadable[DocumentSimilarityRankerApproach] \ No newline at end of file +/** This is the companion object of [[DocumentSimilarityRankerApproach]]. Please refer to that + * class for the documentation. + */ +object DocumentSimilarityRankerApproach + extends DefaultParamsReadable[DocumentSimilarityRankerApproach] diff --git a/src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/DocumentSimilarityRankerModel.scala b/src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/DocumentSimilarityRankerModel.scala index b22e6d0fa38a7e..eb75d78c7df430 100644 --- a/src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/DocumentSimilarityRankerModel.scala +++ b/src/main/scala/com/johnsnowlabs/nlp/annotators/similarity/DocumentSimilarityRankerModel.scala @@ -3,7 +3,13 @@ package com.johnsnowlabs.nlp.annotators.similarity import com.johnsnowlabs.nlp.AnnotatorType.{DOC_SIMILARITY_RANKINGS, SENTENCE_EMBEDDINGS} import com.johnsnowlabs.nlp.embeddings.HasEmbeddingsProperties import com.johnsnowlabs.nlp.serialization.MapFeature -import com.johnsnowlabs.nlp.{Annotation, AnnotatorModel, HasSimpleAnnotate, ParamsAndFeaturesReadable, ParamsAndFeaturesWritable} +import com.johnsnowlabs.nlp.{ + Annotation, + AnnotatorModel, + HasSimpleAnnotate, + ParamsAndFeaturesReadable, + ParamsAndFeaturesWritable +} import com.johnsnowlabs.storage.HasStorageRef import org.apache.spark.ml.util.{DefaultParamsReadable, Identifiable} import org.apache.spark.sql.functions.col @@ -66,6 +72,7 @@ class DocumentSimilarityRankerModel(override val uid: String) }) } -trait ReadableDocumentSimilarityRanker extends ParamsAndFeaturesReadable[DocumentSimilarityRankerModel] +trait ReadableDocumentSimilarityRanker + extends ParamsAndFeaturesReadable[DocumentSimilarityRankerModel] object DocumentSimilarityRankerModel extends ReadableDocumentSimilarityRanker diff --git a/src/main/scala/com/johnsnowlabs/nlp/finisher/DocumentSimilarityRankerFinisher.scala b/src/main/scala/com/johnsnowlabs/nlp/finisher/DocumentSimilarityRankerFinisher.scala index 257a0c92cce43e..3aeb7ccb9dd29b 100644 --- a/src/main/scala/com/johnsnowlabs/nlp/finisher/DocumentSimilarityRankerFinisher.scala +++ b/src/main/scala/com/johnsnowlabs/nlp/finisher/DocumentSimilarityRankerFinisher.scala @@ -177,4 +177,5 @@ case class DocumentSimilarityRankerFinisher(override val uid: String) } } -object DocumentSimilarityRankerFinisher extends DefaultParamsReadable[DocumentSimilarityRankerFinisher] \ No newline at end of file +object DocumentSimilarityRankerFinisher + extends DefaultParamsReadable[DocumentSimilarityRankerFinisher] From e6e9497caa323ba97c63a7e4b08bba5078069c7b Mon Sep 17 00:00:00 2001 From: Stefano Lori Date: Thu, 20 Apr 2023 23:37:00 +0200 Subject: [PATCH 23/26] Added doc sim ranker finisher Python interface --- .../similarity/document_similarity_ranker.py | 82 ++++++- python/sparknlp/lib/test_doc_sim_ranker.ipynb | 223 ++++++++++++++++++ .../similarity/doc_similarity_ranker_test.py | 23 +- .../DocumentSimilarityRankerFinisher.scala | 4 +- .../DocumentSimilarityRankerTestSpec.scala | 14 +- 5 files changed, 325 insertions(+), 21 deletions(-) create mode 100644 python/sparknlp/lib/test_doc_sim_ranker.ipynb diff --git a/python/sparknlp/annotator/similarity/document_similarity_ranker.py b/python/sparknlp/annotator/similarity/document_similarity_ranker.py index c2c5175b48c763..00ba0738be2936 100644 --- a/python/sparknlp/annotator/similarity/document_similarity_ranker.py +++ b/python/sparknlp/annotator/similarity/document_similarity_ranker.py @@ -14,6 +14,9 @@ """Contains classes for DocumentSimilarityRanker.""" from sparknlp.common import * +from pyspark import keyword_only +from pyspark.ml.param import TypeConverters, Params, Param +from sparknlp.internal import AnnotatorTransformer class DocumentSimilarityRankerApproach(AnnotatorApproach, HasEnableCachingProperties): @@ -150,5 +153,80 @@ def __init__(self, classname="com.johnsnowlabs.nlp.annotators.similarity.Documen ) -class DocumentSimilarityRankerFinisher: - pass +class DocumentSimilarityRankerFinisher(AnnotatorTransformer): + + inputCols = Param(Params._dummy(), + "inputCols", + "name of input annotation cols containing document similarity ranker results", + typeConverter=TypeConverters.toListString) + outputCols = Param(Params._dummy(), + "outputCols", + "output DocumentSimilarityRankerFinisher output cols", + typeConverter=TypeConverters.toListString) + extractNearestNeighbor = Param(Params._dummy(), "extractNearestNeighbor", + "whether to extract the nearest neighbor document", + typeConverter=TypeConverters.toBoolean) + + name = "DocumentSimilarityRankerFinisher" + + @keyword_only + def __init__(self): + super(DocumentSimilarityRankerFinisher, self).__init__(classname="com.johnsnowlabs.nlp.finisher.DocumentSimilarityRankerFinisher") + self._setDefault( + extractNearestNeighbor=False + ) + + @keyword_only + def setParams(self): + kwargs = self._input_kwargs + return self._set(**kwargs) + + def setInputCols(self, *value): + """Sets name of input annotation columns containing embeddings. + + Parameters + ---------- + *value : str + Input columns for the annotator + """ + + if len(value) == 1 and type(value[0]) == list: + return self._set(inputCols=value[0]) + else: + return self._set(inputCols=list(value)) + + def setOutputCols(self, *value): + """Sets names of finished output columns. + + Parameters + ---------- + *value : List[str] + Input columns for the annotator + """ + + if len(value) == 1 and type(value[0]) == list: + return self._set(outputCols=value[0]) + else: + return self._set(outputCols=list(value)) + + def setExtractNearestNeighbor(self, value): + """Sets whether to extract the nearest neighbor document, by default False. + + Parameters + ---------- + value : bool + Whether to extract the nearest neighbor document + """ + + return self._set(extractNearestNeighbor=value) + + def getInputCols(self): + """Gets input columns name of annotations.""" + return self.getOrDefault(self.inputCols) + + def getOutputCols(self): + """Gets output columns name of annotations.""" + if len(self.getOrDefault(self.outputCols)) == 0: + return ["finished_" + input_col for input_col in self.getInputCols()] + else: + return self.getOrDefault(self.outputCols) \ No newline at end of file diff --git a/python/sparknlp/lib/test_doc_sim_ranker.ipynb b/python/sparknlp/lib/test_doc_sim_ranker.ipynb new file mode 100644 index 00000000000000..4aa8694a0e5daa --- /dev/null +++ b/python/sparknlp/lib/test_doc_sim_ranker.ipynb @@ -0,0 +1,223 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "1a9dd32e", + "metadata": {}, + "outputs": [], + "source": [ + "# Import Spark NLP\n", + "from sparknlp.base import *\n", + "from sparknlp.annotator import *\n", + "from sparknlp.pretrained import PretrainedPipeline\n", + "import sparknlp\n", + "from pyspark.sql import SparkSession" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "a3f563d5", + "metadata": {}, + "outputs": [], + "source": [ + "data = spark.createDataFrame([\n", + " [\"First document, this is my first sentence. This is my second sentence.\"],\n", + " [\"Second document, this is my second sentence. This is my second sentence.\"],\n", + " [\"Third document, climate change is arguably one of the most pressing problems of our time.\"],\n", + " [\"Fourth document, climate change is definitely one of the most pressing problems of our time.\"],\n", + " [\"Fifth document, Florence in Italy, is among the most beautiful cities in Europe.\"],\n", + " [\"Sixth document, Florence in Italy, is a very beautiful city in Europe like Lyon in France.\"],\n", + " [\"Seventh document, the French Riviera is the Mediterranean coastline of the southeast corner of France.\"],\n", + " [\"Eighth document, the warmest place in France is the French Riviera coast in Southern France.\"]\n", + " ]).toDF(\"text\")" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "34604126", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[Stage 0:> (0 + 1) / 1]\r" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+--------------------+\n", + "| text|\n", + "+--------------------+\n", + "|First document, t...|\n", + "|Second document, ...|\n", + "|Third document, c...|\n", + "|Fourth document, ...|\n", + "|Fifth document, F...|\n", + "|Sixth document, F...|\n", + "|Seventh document,...|\n", + "|Eighth document, ...|\n", + "+--------------------+\n", + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " \r" + ] + } + ], + "source": [ + "data.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "9a8f9eae", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "sent_roberta_base download started this may take some time.\n", + "Approximate size to download 284.8 MB\n", + "[ — ]sent_roberta_base download started this may take some time.\n", + "Approximate size to download 284.8 MB\n", + "Download done! Loading the resource.\n", + "[ — ]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-04-20 23:24:27.498674: I external/org_tensorflow/tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n", + "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[OK!]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " \r" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "23/04/20 23:24:36 WARN InstanceBuilder$NativeBLAS: Failed to load implementation from:dev.ludovic.netlib.blas.JNIBLAS\n", + "23/04/20 23:24:36 WARN InstanceBuilder$NativeBLAS: Failed to load implementation from:dev.ludovic.netlib.blas.ForeignLinkerBLAS\n", + "+------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-----------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-------------------+-------------------------+\n", + "|text |document |sentence |token |sentence_embeddings |doc_similarity_rankings |finished_doc_similarity_rankings_id|finished_doc_similarity_rankings_neighbors |nearest_neighbor_id|nearest_neighbor_distance|\n", + "+------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-----------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-------------------+-------------------------+\n", + "|First document, this is my first sentence. This is my second sentence. |[{document, 0, 69, First document, this is my first sentence. This is my second sentence., {sentence -> 0}, []}] |[{document, 0, 41, First document, this is my first sentence., {sentence -> 0}, []}, {document, 43, 69, This is my second sentence., {sentence -> 1}, []}] |[{token, 0, 4, First, {sentence -> 0}, []}, {token, 6, 13, document, {sentence -> 0}, []}, {token, 14, 14, ,, {sentence -> 0}, []}, {token, 16, 19, this, {sentence -> 0}, []}, {token, 21, 22, is, {sentence -> 0}, []}, {token, 24, 25, my, {sentence -> 0}, []}, {token, 27, 31, first, {sentence -> 0}, []}, {token, 33, 40, sentence, {sentence -> 0}, []}, {token, 41, 41, ., {sentence -> 0}, []}, {token, 43, 46, This, {sentence -> 1}, []}, {token, 48, 49, is, {sentence -> 1}, []}, {token, 51, 52, my, {sentence -> 1}, []}, {token, 54, 59, second, {sentence -> 1}, []}, {token, 61, 68, sentence, {sentence -> 1}, []}, {token, 69, 69, ., {sentence -> 1}, []}] |[{sentence_embeddings, 0, 69, First document, this is my first sentence. This is my second sentence., {sentence -> 0, token -> First document, this is my first sentence. This is my second sentence., pieceId -> -1, isWordStart -> true}, [-0.0016509573, -0.20525712, -0.21965097, -0.059896577, 0.1287623, 0.19543253, 0.2601918, -0.08692171, -0.105864875, -0.1537919, 0.23202443, 0.018591736, -0.09253034, 0.086080864, -0.13730444, 0.4803275, 0.22649625, -0.4538324, 0.060501292, -0.022498287, -0.2590919, 0.052696917, 0.459304, 0.34466252, 0.096251465, 0.052280027, -0.13888891, -0.019944986, 0.16517194, 0.23654841, 0.28898573, 0.06694212, 0.10338869, 0.25518826, -0.23867318, 0.06869716, -0.3045499, 0.008243265, 0.2485094, -0.18031433, -0.071846046, 0.15547389, 0.20157382, -0.15197659, -0.11142959, 0.40137476, 0.2453684, 0.016406162, -0.09601788, -0.09317206, -0.35614085, 0.32941064, 0.28732747, 0.192841, -0.010993258, 0.03665424, -0.15332705, 0.26033446, -0.09785265, -0.100575626, -0.11137454, -0.20248345, -0.011054744, -0.06887667, 0.010282027, -0.12600632, 0.09104587, -0.1361701, -0.16300207, 0.07745324, -0.07060441, 0.15660061, 0.17127551, -0.31203714, -0.28957927, 0.06316288, -0.5809974, -0.11879944, 0.29057372, 0.44116113, -0.12183485, 0.1956723, 0.05491798, 0.21726313, -0.014335553, -0.042902187, -0.051117186, -0.10438917, 0.17740841, 0.26991013, -0.20177917, -0.39176428, 0.056344096, 0.018784363, -0.08682689, 0.008575063, -0.02186684, -0.06476933, -0.15295796, -0.18002006, 0.07875256, -0.2424122, -0.14407246, 0.2685751, -0.0313854, -0.19538617, -0.015640117, 0.29632342, 0.07008212, -0.09939836, -0.16506532, 0.41788468, 0.29194131, 0.011487283, 0.015623666, 0.16049236, 0.12739733, -0.2970833, 0.42711484, -0.30154955, -0.0042597246, -0.09696268, 0.11135193, 0.13904221, -0.21608506, 0.29018146, 0.16236295, 0.25396034, 0.19324322, 0.08677476, -0.053531177, 0.14517988, -0.11484923, 0.15421726, 0.22026348, 0.11952507, 0.010373934, -0.34372324, -0.23538907, 0.2590132, 0.32607847, 0.18607004, -0.041907027, 0.17356849, 0.11717049, 0.23005699, 0.14421561, -0.40986398, 0.04959026, 0.32919577, 0.11714556, 0.15356015, -0.05093626, -0.27187335, -0.2434729, -0.0744803, 0.046752095, -0.31802502, -0.13378103, 0.37520665, 0.04794983, -0.015131961, -0.15734115, -0.2253003, -0.006646415, -0.10654589, 0.03465094, 0.08154701, -0.0929395, -0.3991926, -0.09223294, -0.55187756, -0.13733594, 0.20702904, -0.30563506, 0.24306192, -0.30793938, 0.10908097, 0.37263408, 0.051739924, 0.013253078, -0.18778512, -0.010503004, 0.13689041, 0.33355665, 0.22972348, -0.38842443, 0.09157828, 0.16181372, 0.25097308, 0.12582561, -0.040875636, -0.13416474, 0.14245158, -0.19798084, 0.16056503, -0.20634986, 0.17811936, -0.23963411, -0.20601338, 0.2909662, -0.41837695, -0.043886326, 0.10675116, 0.2580096, 0.012383441, -0.020623814, -0.08108415, 0.11714205, 0.18568753, 0.13704987, -0.38517642, 0.27483293, -0.027029455, -0.029421767, -0.03520134, 0.17171104, 0.25429082, 0.096434996, -0.40192246, -0.15646617, 0.108028576, 0.27889878, -0.22277395, 0.16855176, -0.3042606, -0.3762433, -0.1455701, 0.18953793, 0.21115941, 0.15629233, -0.26154017, 0.18894924, -0.09465889, -0.44113842, -0.36655298, -0.109620884, 0.2422129, 0.16998006, 0.1736859, 0.24283548, 0.023980502, 0.1371494, 0.15047146, 0.19360293, -0.13204347, 0.17984484, -0.36500728, -0.05806427, -0.26840413, -0.19872135, -0.23028538, 0.3935414, -0.19386484, 0.23332444, 0.39288434, -0.30275175, -0.10890826, 0.15745756, 0.10137965, 0.06952018, -0.13416727, 0.21371306, 0.17559311, -0.101447366, 0.23363695, -0.0016540436, 0.27593082, 0.19691755, 0.10001489, 0.14923371, 0.13464274, -0.15578793, 0.05758963, 0.018758772, -0.018384326, -0.24449226, -0.13423687, 0.22088864, -0.0455652, 0.027285956, -0.1665838, -0.107885145, 0.021460643, 0.3850578, -0.35932088, 0.26157242, 0.07409609, 0.14653832, -0.22538042, -0.20356905, 0.065705076, 0.18143511, -0.40029642, -0.014136726, 0.15440395, 0.10932906, 0.21850891, 0.2580729, -0.00988111, -0.120041765, 0.4927309, -0.14106606, -0.12887348, 0.278349, -0.26594374, -0.27160963, 0.2535723, -0.01851703, 0.30484486, 0.14516489, 0.030297654, 0.042642463, -0.6132387, 0.07002391, -0.4603895, -0.012375533, 0.022541162, -0.09806742, -0.18119614, 0.13785718, 0.2639673, -0.2530231, -0.02733121, 0.20391333, 0.07542794, -0.09949427, 0.48948997, -0.014352312, 0.21734369, -0.06898155, 0.2560105, -0.1990966, 0.27983844, -0.27486038, -0.113962464, 0.015387479, 0.0917569, 0.05042972, -0.05722061, -0.33860856, 0.20994869, 0.010669914, -0.042475466, -0.06582815, 0.09746496, -0.018288288, 0.05784519, 0.0482757, 0.32611609, 0.20912243, -0.018082418, -0.380203, -0.024771776, -0.10463602, 0.040596716, 0.027981333, -0.033508264, 0.43150952, -0.1027142, 0.014048678, -0.1371664, 0.26183385, 0.2007899, 0.1397189, 0.12691422, 0.092273235, 0.15318273, -0.036203247, -0.0019277359, -0.122955844, -0.23079583, -0.27070555, 0.21413256, -0.22849205, -0.15937866, 0.16445585, 0.2226356, -0.13215786, 0.16300718, 0.3076105, 0.10777309, -0.15043095, 0.25800404, -0.10401015, 0.11895908, 0.31831232, -0.027889991, 0.16663, 0.50905895, 0.20906426, -0.3731108, -0.015852178, -0.21458037, -0.008900974, 0.23457617, -0.13320251, 0.17811044, 0.38757527, 0.31200206, 0.45592153, -0.017489515, -0.10929343, 0.08955372, 0.22110991, 0.021531774, -0.1519175, -0.1773438, 0.2732921, 0.06679491, -0.14536235, -0.021941066, -0.14891659, 0.016744306, -0.12736456, -0.39618, 0.039470803, 0.20796895, -0.4877525, 0.1055665, -0.2754277, 0.04144014, -0.22956206, 0.20604385, -0.22717506, -0.10176536, 0.39214277, -0.08812965, 0.029923324, -0.14861423, -0.15354954, 0.01690382, 0.030926324, -0.04741114, -0.023319794, 0.3530178, -0.11661981, 0.026740639, 0.02915204, 0.20712481, -0.062735416, 0.16828942, 0.027333798, -0.143883, -0.38526252, 0.11417671, -0.20366596, -0.44497097, -0.3797921, 0.36764264, -0.15239948, -0.24930836, -0.22178406, -0.26763546, 0.059928652, 0.18547069, 0.46448106, -0.38581342, -0.0952114, 0.50129086, -0.07868489, -0.16569282, 0.28730562, 0.1784999, -0.3028391, 0.32795244, 0.26607093, -0.04681752, 0.028598929, 0.5256846, 0.11711999, 0.19183147, -0.21013169, 0.44537762, -0.22686206, 0.31831375, -0.14511606, -0.19815892, -0.22267957, 0.014032256, 0.32726866, 0.20925169, -0.42334914, -0.11525112, 0.03519667, 0.35435578, -0.386533, -0.061628006, -0.00946854, -0.33106926, 0.1413482, 0.08982125, 0.21938783, -0.36983567, -0.013867053, 0.40048254, -0.28595185, 0.12054411, 0.31387663, 0.06425676, 0.36636186, -0.02155459, -0.0055017034, 0.027993213, -0.2304683, -0.014896044, 0.13527954, 0.57070816, 0.15139924, -0.38313928, 0.09329611, 0.24846943, -0.18317448, 0.3021817, -0.09956795, -0.031919435, 0.26749623, -0.03647182, 0.13850948, -0.08908831, -0.23334776, -0.3168528, 0.38110238, -0.19650906, -0.10419602, -0.16122277, -0.10340526, -0.16876405, 0.039111413, -0.3708464, 0.31809127, 0.14100179, -0.18282901, -0.07942389, -0.089590766, -0.14431885, -0.22517395, -0.25504246, 0.4375917, -0.15872298, -0.4546147, 0.26022837, 0.03248598, 0.35889503, 0.048524577, 0.088019766, -0.06272402, 0.15277462, 0.08405457, -0.12085052, 0.2635839, 0.06471835, -0.53696585, -0.12954685, -0.20841905, 0.08119028, 0.204219, -0.3485882, 0.03504887, 0.010403169, 0.14514661, 0.00751219, -0.098806985, -0.07772141, 0.40441778, 0.24756968, 0.3027828, 0.09314904, 0.23755553, -0.031587124, -0.33195436, 0.043194465, 0.08573535, -0.18050656, 0.4158775, -0.07615283, -0.3771687, -0.064762294, 0.40367717, 0.09764434, 0.02901462, -0.04842578, 0.21205749, 0.16108298, -0.14622071, 0.17626472, -0.029647602, -0.13114613, -0.098030314, 0.07949154, -0.21446016, 0.0422025, -0.15453711, -0.0126237925, -0.20044088, 0.0046409313, -0.19496118, 0.2493613, -0.3019857, 0.07960567, 0.0483416, 0.28941196, -0.34885955, -0.16683857, -0.076981924, 0.14302921, 0.262528, 0.34287122, 0.026721897, 0.025925094, -0.17189877, -0.24605455, 0.050659895, -0.21924202, 0.14178936, 0.08741318, 0.2537666, -0.31009108, -0.18676534, 0.2118603, -0.09943448, -0.14400531, 0.39799818, 0.2653751, 0.20913126, 0.012621317, 0.25341883, 0.026780201, -0.1865512, -0.12074198, -0.25787765, 0.073576815, -0.08996679, -0.059922904, -0.06613629, -0.11565135, -0.19995238, -0.15129463, 0.1596153, 0.14251925, 0.025685819, -0.05717514, -0.012424833, -0.28294817, 0.32567903, 0.025750922, 0.06216015, -0.05263431, 0.045462035, -0.13553426, 0.23230933, 0.21270712, 0.075220734, -0.18877773, -0.0791943, -0.28871432, -0.35735613, 0.063926004, 0.12319752, 0.108247474, -0.083363935, -0.24347112, -0.004402367, -0.15542541, 0.19257565, 0.022904381, -0.15720096, -0.08451907, -0.04471166, -0.024871575, 0.07304403, -0.19043835, -0.19255094, -0.110913895, -0.07317639, -0.0738659, 0.3432893, -0.05318059, 0.2751972, -0.15310696, -0.0040784623, -0.17644951, 0.10200317, -0.061957322, 0.05185596, 0.2646503, -0.44402647, -0.1641448, -0.030240616, -0.18341458, -0.14352442, -0.082672276, -0.024050146, 0.24113926, -0.36153135, 0.22421445, -0.13683006, 0.19264282, -0.07566274, -0.2577963, -0.14437243, -0.005780896, 0.2361684, -0.34697405, -0.2580383, -0.26091143, -0.09685133, -0.078017235, -0.26482865, 0.41297123, -0.107337065, -0.07583126, 0.007925678, 0.46675873, 0.21356691, 0.16632208, 0.22148071, -0.032334905, 0.03056415, 0.09642622, -0.45709848, 0.24851573, -0.2539892, -0.12904367, 0.016261106, 0.12153378, -0.0012138055, 0.025865352, -0.13303627, -0.09864319, 0.21006866, -0.35695034, -0.045082778, 0.25232777, 0.14546284, -0.24803042, 0.02825165, 0.12093924, 0.37656778, 0.08200147, -0.22424765, 0.10366332, -0.3352094, -0.017510755, -0.19172588, -0.30078256, 0.16272469, -0.06270328, 0.07839331, -0.098105066, -0.30520752, 0.2076339, -0.08514935, -0.06716773, 0.4287407, 0.02735758, -0.13251884, 0.14675677, 0.029392201, 0.032187577, -0.098731965, 0.28921035, 0.23158008, -0.28971958, 0.15396328, -0.14108004, -0.057366837, -0.07640423]}] |[{doc_similarity_rankings, 0, 69, First document, this is my first sentence. This is my second sentence., {pieceId -> -1, lshId -> 1510101612, isWordStart -> true, token -> First document, this is my first sentence. This is my second sentence., lshNeighbors -> [(1634839239,0.12448559273510636),(1274183715,0.36788497133113396),(-612640902,0.3851201869623605),(1293373212,0.3979194244143298),(-1320876223,0.3994126224276987),(-1719102856,0.4043799951515284),(-1548374770,0.41026101952006294)], sentence -> 0}, [-0.0016509573, -0.20525712, -0.21965097, -0.059896577, 0.1287623, 0.19543253, 0.2601918, -0.08692171, -0.105864875, -0.1537919, 0.23202443, 0.018591736, -0.09253034, 0.086080864, -0.13730444, 0.4803275, 0.22649625, -0.4538324, 0.060501292, -0.022498287, -0.2590919, 0.052696917, 0.459304, 0.34466252, 0.096251465, 0.052280027, -0.13888891, -0.019944986, 0.16517194, 0.23654841, 0.28898573, 0.06694212, 0.10338869, 0.25518826, -0.23867318, 0.06869716, -0.3045499, 0.008243265, 0.2485094, -0.18031433, -0.071846046, 0.15547389, 0.20157382, -0.15197659, -0.11142959, 0.40137476, 0.2453684, 0.016406162, -0.09601788, -0.09317206, -0.35614085, 0.32941064, 0.28732747, 0.192841, -0.010993258, 0.03665424, -0.15332705, 0.26033446, -0.09785265, -0.100575626, -0.11137454, -0.20248345, -0.011054744, -0.06887667, 0.010282027, -0.12600632, 0.09104587, -0.1361701, -0.16300207, 0.07745324, -0.07060441, 0.15660061, 0.17127551, -0.31203714, -0.28957927, 0.06316288, -0.5809974, -0.11879944, 0.29057372, 0.44116113, -0.12183485, 0.1956723, 0.05491798, 0.21726313, -0.014335553, -0.042902187, -0.051117186, -0.10438917, 0.17740841, 0.26991013, -0.20177917, -0.39176428, 0.056344096, 0.018784363, -0.08682689, 0.008575063, -0.02186684, -0.06476933, -0.15295796, -0.18002006, 0.07875256, -0.2424122, -0.14407246, 0.2685751, -0.0313854, -0.19538617, -0.015640117, 0.29632342, 0.07008212, -0.09939836, -0.16506532, 0.41788468, 0.29194131, 0.011487283, 0.015623666, 0.16049236, 0.12739733, -0.2970833, 0.42711484, -0.30154955, -0.0042597246, -0.09696268, 0.11135193, 0.13904221, -0.21608506, 0.29018146, 0.16236295, 0.25396034, 0.19324322, 0.08677476, -0.053531177, 0.14517988, -0.11484923, 0.15421726, 0.22026348, 0.11952507, 0.010373934, -0.34372324, -0.23538907, 0.2590132, 0.32607847, 0.18607004, -0.041907027, 0.17356849, 0.11717049, 0.23005699, 0.14421561, -0.40986398, 0.04959026, 0.32919577, 0.11714556, 0.15356015, -0.05093626, -0.27187335, -0.2434729, -0.0744803, 0.046752095, -0.31802502, -0.13378103, 0.37520665, 0.04794983, -0.015131961, -0.15734115, -0.2253003, -0.006646415, -0.10654589, 0.03465094, 0.08154701, -0.0929395, -0.3991926, -0.09223294, -0.55187756, -0.13733594, 0.20702904, -0.30563506, 0.24306192, -0.30793938, 0.10908097, 0.37263408, 0.051739924, 0.013253078, -0.18778512, -0.010503004, 0.13689041, 0.33355665, 0.22972348, -0.38842443, 0.09157828, 0.16181372, 0.25097308, 0.12582561, -0.040875636, -0.13416474, 0.14245158, -0.19798084, 0.16056503, -0.20634986, 0.17811936, -0.23963411, -0.20601338, 0.2909662, -0.41837695, -0.043886326, 0.10675116, 0.2580096, 0.012383441, -0.020623814, -0.08108415, 0.11714205, 0.18568753, 0.13704987, -0.38517642, 0.27483293, -0.027029455, -0.029421767, -0.03520134, 0.17171104, 0.25429082, 0.096434996, -0.40192246, -0.15646617, 0.108028576, 0.27889878, -0.22277395, 0.16855176, -0.3042606, -0.3762433, -0.1455701, 0.18953793, 0.21115941, 0.15629233, -0.26154017, 0.18894924, -0.09465889, -0.44113842, -0.36655298, -0.109620884, 0.2422129, 0.16998006, 0.1736859, 0.24283548, 0.023980502, 0.1371494, 0.15047146, 0.19360293, -0.13204347, 0.17984484, -0.36500728, -0.05806427, -0.26840413, -0.19872135, -0.23028538, 0.3935414, -0.19386484, 0.23332444, 0.39288434, -0.30275175, -0.10890826, 0.15745756, 0.10137965, 0.06952018, -0.13416727, 0.21371306, 0.17559311, -0.101447366, 0.23363695, -0.0016540436, 0.27593082, 0.19691755, 0.10001489, 0.14923371, 0.13464274, -0.15578793, 0.05758963, 0.018758772, -0.018384326, -0.24449226, -0.13423687, 0.22088864, -0.0455652, 0.027285956, -0.1665838, -0.107885145, 0.021460643, 0.3850578, -0.35932088, 0.26157242, 0.07409609, 0.14653832, -0.22538042, -0.20356905, 0.065705076, 0.18143511, -0.40029642, -0.014136726, 0.15440395, 0.10932906, 0.21850891, 0.2580729, -0.00988111, -0.120041765, 0.4927309, -0.14106606, -0.12887348, 0.278349, -0.26594374, -0.27160963, 0.2535723, -0.01851703, 0.30484486, 0.14516489, 0.030297654, 0.042642463, -0.6132387, 0.07002391, -0.4603895, -0.012375533, 0.022541162, -0.09806742, -0.18119614, 0.13785718, 0.2639673, -0.2530231, -0.02733121, 0.20391333, 0.07542794, -0.09949427, 0.48948997, -0.014352312, 0.21734369, -0.06898155, 0.2560105, -0.1990966, 0.27983844, -0.27486038, -0.113962464, 0.015387479, 0.0917569, 0.05042972, -0.05722061, -0.33860856, 0.20994869, 0.010669914, -0.042475466, -0.06582815, 0.09746496, -0.018288288, 0.05784519, 0.0482757, 0.32611609, 0.20912243, -0.018082418, -0.380203, -0.024771776, -0.10463602, 0.040596716, 0.027981333, -0.033508264, 0.43150952, -0.1027142, 0.014048678, -0.1371664, 0.26183385, 0.2007899, 0.1397189, 0.12691422, 0.092273235, 0.15318273, -0.036203247, -0.0019277359, -0.122955844, -0.23079583, -0.27070555, 0.21413256, -0.22849205, -0.15937866, 0.16445585, 0.2226356, -0.13215786, 0.16300718, 0.3076105, 0.10777309, -0.15043095, 0.25800404, -0.10401015, 0.11895908, 0.31831232, -0.027889991, 0.16663, 0.50905895, 0.20906426, -0.3731108, -0.015852178, -0.21458037, -0.008900974, 0.23457617, -0.13320251, 0.17811044, 0.38757527, 0.31200206, 0.45592153, -0.017489515, -0.10929343, 0.08955372, 0.22110991, 0.021531774, -0.1519175, -0.1773438, 0.2732921, 0.06679491, -0.14536235, -0.021941066, -0.14891659, 0.016744306, -0.12736456, -0.39618, 0.039470803, 0.20796895, -0.4877525, 0.1055665, -0.2754277, 0.04144014, -0.22956206, 0.20604385, -0.22717506, -0.10176536, 0.39214277, -0.08812965, 0.029923324, -0.14861423, -0.15354954, 0.01690382, 0.030926324, -0.04741114, -0.023319794, 0.3530178, -0.11661981, 0.026740639, 0.02915204, 0.20712481, -0.062735416, 0.16828942, 0.027333798, -0.143883, -0.38526252, 0.11417671, -0.20366596, -0.44497097, -0.3797921, 0.36764264, -0.15239948, -0.24930836, -0.22178406, -0.26763546, 0.059928652, 0.18547069, 0.46448106, -0.38581342, -0.0952114, 0.50129086, -0.07868489, -0.16569282, 0.28730562, 0.1784999, -0.3028391, 0.32795244, 0.26607093, -0.04681752, 0.028598929, 0.5256846, 0.11711999, 0.19183147, -0.21013169, 0.44537762, -0.22686206, 0.31831375, -0.14511606, -0.19815892, -0.22267957, 0.014032256, 0.32726866, 0.20925169, -0.42334914, -0.11525112, 0.03519667, 0.35435578, -0.386533, -0.061628006, -0.00946854, -0.33106926, 0.1413482, 0.08982125, 0.21938783, -0.36983567, -0.013867053, 0.40048254, -0.28595185, 0.12054411, 0.31387663, 0.06425676, 0.36636186, -0.02155459, -0.0055017034, 0.027993213, -0.2304683, -0.014896044, 0.13527954, 0.57070816, 0.15139924, -0.38313928, 0.09329611, 0.24846943, -0.18317448, 0.3021817, -0.09956795, -0.031919435, 0.26749623, -0.03647182, 0.13850948, -0.08908831, -0.23334776, -0.3168528, 0.38110238, -0.19650906, -0.10419602, -0.16122277, -0.10340526, -0.16876405, 0.039111413, -0.3708464, 0.31809127, 0.14100179, -0.18282901, -0.07942389, -0.089590766, -0.14431885, -0.22517395, -0.25504246, 0.4375917, -0.15872298, -0.4546147, 0.26022837, 0.03248598, 0.35889503, 0.048524577, 0.088019766, -0.06272402, 0.15277462, 0.08405457, -0.12085052, 0.2635839, 0.06471835, -0.53696585, -0.12954685, -0.20841905, 0.08119028, 0.204219, -0.3485882, 0.03504887, 0.010403169, 0.14514661, 0.00751219, -0.098806985, -0.07772141, 0.40441778, 0.24756968, 0.3027828, 0.09314904, 0.23755553, -0.031587124, -0.33195436, 0.043194465, 0.08573535, -0.18050656, 0.4158775, -0.07615283, -0.3771687, -0.064762294, 0.40367717, 0.09764434, 0.02901462, -0.04842578, 0.21205749, 0.16108298, -0.14622071, 0.17626472, -0.029647602, -0.13114613, -0.098030314, 0.07949154, -0.21446016, 0.0422025, -0.15453711, -0.0126237925, -0.20044088, 0.0046409313, -0.19496118, 0.2493613, -0.3019857, 0.07960567, 0.0483416, 0.28941196, -0.34885955, -0.16683857, -0.076981924, 0.14302921, 0.262528, 0.34287122, 0.026721897, 0.025925094, -0.17189877, -0.24605455, 0.050659895, -0.21924202, 0.14178936, 0.08741318, 0.2537666, -0.31009108, -0.18676534, 0.2118603, -0.09943448, -0.14400531, 0.39799818, 0.2653751, 0.20913126, 0.012621317, 0.25341883, 0.026780201, -0.1865512, -0.12074198, -0.25787765, 0.073576815, -0.08996679, -0.059922904, -0.06613629, -0.11565135, -0.19995238, -0.15129463, 0.1596153, 0.14251925, 0.025685819, -0.05717514, -0.012424833, -0.28294817, 0.32567903, 0.025750922, 0.06216015, -0.05263431, 0.045462035, -0.13553426, 0.23230933, 0.21270712, 0.075220734, -0.18877773, -0.0791943, -0.28871432, -0.35735613, 0.063926004, 0.12319752, 0.108247474, -0.083363935, -0.24347112, -0.004402367, -0.15542541, 0.19257565, 0.022904381, -0.15720096, -0.08451907, -0.04471166, -0.024871575, 0.07304403, -0.19043835, -0.19255094, -0.110913895, -0.07317639, -0.0738659, 0.3432893, -0.05318059, 0.2751972, -0.15310696, -0.0040784623, -0.17644951, 0.10200317, -0.061957322, 0.05185596, 0.2646503, -0.44402647, -0.1641448, -0.030240616, -0.18341458, -0.14352442, -0.082672276, -0.024050146, 0.24113926, -0.36153135, 0.22421445, -0.13683006, 0.19264282, -0.07566274, -0.2577963, -0.14437243, -0.005780896, 0.2361684, -0.34697405, -0.2580383, -0.26091143, -0.09685133, -0.078017235, -0.26482865, 0.41297123, -0.107337065, -0.07583126, 0.007925678, 0.46675873, 0.21356691, 0.16632208, 0.22148071, -0.032334905, 0.03056415, 0.09642622, -0.45709848, 0.24851573, -0.2539892, -0.12904367, 0.016261106, 0.12153378, -0.0012138055, 0.025865352, -0.13303627, -0.09864319, 0.21006866, -0.35695034, -0.045082778, 0.25232777, 0.14546284, -0.24803042, 0.02825165, 0.12093924, 0.37656778, 0.08200147, -0.22424765, 0.10366332, -0.3352094, -0.017510755, -0.19172588, -0.30078256, 0.16272469, -0.06270328, 0.07839331, -0.098105066, -0.30520752, 0.2076339, -0.08514935, -0.06716773, 0.4287407, 0.02735758, -0.13251884, 0.14675677, 0.029392201, 0.032187577, -0.098731965, 0.28921035, 0.23158008, -0.28971958, 0.15396328, -0.14108004, -0.057366837, -0.07640423]}] |1510101612 |[(1634839239,0.12448559273510636),(1274183715,0.36788497133113396),(-612640902,0.3851201869623605),(1293373212,0.3979194244143298),(-1320876223,0.3994126224276987),(-1719102856,0.4043799951515284),(-1548374770,0.41026101952006294)] |1634839239 |0.12448559273510636 |\n", + "|Second document, this is my second sentence. This is my second sentence. |[{document, 0, 71, Second document, this is my second sentence. This is my second sentence., {sentence -> 0}, []}] |[{document, 0, 43, Second document, this is my second sentence., {sentence -> 0}, []}, {document, 45, 71, This is my second sentence., {sentence -> 1}, []}]|[{token, 0, 5, Second, {sentence -> 0}, []}, {token, 7, 14, document, {sentence -> 0}, []}, {token, 15, 15, ,, {sentence -> 0}, []}, {token, 17, 20, this, {sentence -> 0}, []}, {token, 22, 23, is, {sentence -> 0}, []}, {token, 25, 26, my, {sentence -> 0}, []}, {token, 28, 33, second, {sentence -> 0}, []}, {token, 35, 42, sentence, {sentence -> 0}, []}, {token, 43, 43, ., {sentence -> 0}, []}, {token, 45, 48, This, {sentence -> 1}, []}, {token, 50, 51, is, {sentence -> 1}, []}, {token, 53, 54, my, {sentence -> 1}, []}, {token, 56, 61, second, {sentence -> 1}, []}, {token, 63, 70, sentence, {sentence -> 1}, []}, {token, 71, 71, ., {sentence -> 1}, []}] |[{sentence_embeddings, 0, 71, Second document, this is my second sentence. This is my second sentence., {sentence -> 0, token -> Second document, this is my second sentence. This is my second sentence., pieceId -> -1, isWordStart -> true}, [-7.9203E-4, -0.19994189, -0.21818015, -0.068899736, 0.12664562, 0.1954791, 0.25884947, -0.08906762, -0.096521795, -0.15146676, 0.23279426, 0.02231225, -0.091652475, 0.088964544, -0.13240255, 0.48075354, 0.22785556, -0.45156693, 0.062804036, -0.020177238, -0.25701693, 0.055076845, 0.4630905, 0.34240133, 0.09377383, 0.054915123, -0.14161351, -0.017130367, 0.1733012, 0.23704202, 0.28406826, 0.06854082, 0.10759672, 0.258621, -0.23658031, 0.06974518, -0.30307853, 0.010062803, 0.24419631, -0.17857389, -0.061605684, 0.15594025, 0.20267735, -0.14955261, -0.11479992, 0.39396986, 0.23352055, 0.012666446, -0.0962206, -0.09166719, -0.35843217, 0.33310696, 0.28614143, 0.19793509, -0.014299779, 0.03125637, -0.15131183, 0.26195115, -0.092621945, -0.09774493, -0.10871576, -0.20030053, -0.014465115, -0.058903776, 0.010456275, -0.12946285, 0.090752184, -0.13612632, -0.15917492, 0.073066816, -0.07082851, 0.15889694, 0.16751619, -0.31353, -0.28896278, 0.061326317, -0.5799537, -0.11939915, 0.28790224, 0.44152555, -0.119611256, 0.19590247, 0.051425744, 0.21189548, -0.01531638, -0.046113886, -0.056209974, -0.10596114, 0.18074782, 0.27465618, -0.20207298, -0.38995308, 0.055122897, 0.023203688, -0.0892057, 0.0093817655, -0.014075832, -0.06670877, -0.15410645, -0.1759522, 0.083973736, -0.24712972, -0.13847028, 0.26776898, -0.03092194, -0.20474298, -0.010832185, 0.28615507, 0.06926395, -0.09348502, -0.15698338, 0.41989443, 0.3010278, 0.005741507, 0.010479024, 0.16931498, 0.12767749, -0.29227796, 0.42611268, -0.30473405, -0.0071336213, -0.100375555, 0.09961725, 0.14721175, -0.21532084, 0.28975573, 0.15805171, 0.2587544, 0.18892317, 0.08308744, -0.053409446, 0.14420575, -0.117936045, 0.16155364, 0.21884927, 0.11623547, 0.016398214, -0.34012944, -0.23610827, 0.25944826, 0.3265321, 0.17884685, -0.029824225, 0.17586859, 0.11585, 0.22395201, 0.1441974, -0.40962303, 0.055336952, 0.33760974, 0.11725796, 0.1549155, -0.057688136, -0.27527377, -0.23793808, -0.06886171, 0.04727421, -0.32366428, -0.124764286, 0.37829912, 0.044418834, -0.014433529, -0.15863699, -0.23130025, -0.014072199, -0.116705395, 0.026241766, 0.082573965, -0.089801684, -0.39718676, -0.09980003, -0.5494743, -0.1430713, 0.21158108, -0.30317372, 0.24022597, -0.3023537, 0.111940525, 0.37496758, 0.05516233, 0.011221796, -0.18284394, -0.0047331, 0.13796207, 0.32586053, 0.23857506, -0.38834578, 0.09571896, 0.15859152, 0.2515127, 0.12696691, -0.037285045, -0.13651451, 0.13980338, -0.198333, 0.15924208, -0.20204458, 0.18000285, -0.24857216, -0.20852609, 0.29131892, -0.41542646, -0.04368751, 0.10883073, 0.26118433, 0.013824453, -0.027603198, -0.08514061, 0.117200404, 0.18326314, 0.14687406, -0.3891525, 0.27375975, -0.02553157, -0.033559944, -0.038643405, 0.1725868, 0.24817786, 0.10062808, -0.3984106, -0.15110281, 0.107347734, 0.2811384, -0.22248136, 0.17181505, -0.30180675, -0.37590006, -0.13890691, 0.1929018, 0.21564123, 0.14888352, -0.2589628, 0.18979052, -0.09733246, -0.44260895, -0.3626373, -0.10656619, 0.24600953, 0.17717755, 0.17176694, 0.24860108, 0.02450866, 0.13089818, 0.14760958, 0.1889736, -0.13462782, 0.17552358, -0.36474925, -0.054144062, -0.2719437, -0.20575465, -0.2259518, 0.390171, -0.1989032, 0.23816091, 0.39332652, -0.30287528, -0.11111271, 0.15751657, 0.10828888, 0.061609615, -0.13818201, 0.21809433, 0.17633602, -0.10041227, 0.22838311, -0.0037272298, 0.27824283, 0.19050625, 0.09929577, 0.14321278, 0.12866658, -0.1560034, 0.054702457, 0.0034277993, -0.023457147, -0.24974422, -0.12811284, 0.2138116, -0.043564834, 0.024615834, -0.17061573, -0.105848975, 0.01979917, 0.3876299, -0.3644679, 0.25748017, 0.0794696, 0.15106939, -0.2271831, -0.2029017, 0.06485374, 0.18893777, -0.40528575, -0.0067824377, 0.15022416, 0.11026635, 0.21823986, 0.25255138, -0.009357705, -0.11272444, 0.48919556, -0.14951092, -0.12166809, 0.28113428, -0.26702005, -0.2696553, 0.25774094, -0.020699237, 0.30593178, 0.13539925, 0.023027057, 0.04737817, -0.60946774, 0.07452417, -0.4618816, -0.012704029, 0.017143121, -0.0928661, -0.18195343, 0.13803084, 0.26418334, -0.25597656, -0.028610228, 0.20601481, 0.06913206, -0.105633095, 0.48713952, -0.017569678, 0.21389663, -0.06817315, 0.26016077, -0.2034709, 0.27557772, -0.27667582, -0.11112172, 0.017718645, 0.084379435, 0.043280818, -0.059255335, -0.3404926, 0.22076394, 0.0044043344, -0.037489407, -0.06305672, 0.09715581, -0.023487065, 0.062111195, 0.05335917, 0.32398656, 0.2121781, -0.015352369, -0.37581238, -0.02546437, -0.1003463, 0.049476393, 0.01883333, -0.03315766, 0.43547428, -0.10425473, 0.011709515, -0.13826483, 0.26741698, 0.20992693, 0.14296642, 0.12905589, 0.091058806, 0.15014836, -0.033118833, -0.0050203684, -0.1263514, -0.2340833, -0.27371702, 0.21375184, -0.22322327, -0.15550363, 0.16027404, 0.22693431, -0.12760872, 0.16786651, 0.30304092, 0.1020746, -0.15578815, 0.25656548, -0.101949446, 0.127458, 0.31914127, -0.029818093, 0.17126171, 0.5050371, 0.2140625, -0.36993846, -0.019031882, -0.21702774, -0.0075844345, 0.23762833, -0.13128382, 0.17342623, 0.3832189, 0.30292338, 0.45272982, -0.014391475, -0.10896908, 0.092533424, 0.22071956, 0.023630338, -0.15142313, -0.16476907, 0.2736263, 0.06355073, -0.14254087, -0.017445322, -0.15101986, 0.018103518, -0.13215697, -0.39054778, 0.031270053, 0.20609067, -0.4899748, 0.10231394, -0.28053063, 0.042639177, -0.2338356, 0.21435437, -0.23163229, -0.10278779, 0.38594186, -0.08828131, 0.033451065, -0.15233083, -0.14713119, 0.015353501, 0.024095697, -0.04088602, -0.021885296, 0.35037458, -0.12262792, 0.029172843, 0.031767074, 0.20502482, -0.059423614, 0.17006303, 0.03032189, -0.14278243, -0.38293302, 0.12350028, -0.20444815, -0.446956, -0.38407224, 0.36807615, -0.14949287, -0.25289682, -0.22304098, -0.26412737, 0.054438926, 0.1866949, 0.46163344, -0.3870432, -0.09577562, 0.4925763, -0.07591358, -0.17125858, 0.29294312, 0.18506376, -0.3011424, 0.33213896, 0.27247593, -0.04371708, 0.022509042, 0.5281235, 0.11490675, 0.18309964, -0.2098084, 0.44932392, -0.2238262, 0.32604268, -0.15083954, -0.19439663, -0.21758024, 0.016958022, 0.31868294, 0.20522523, -0.4294383, -0.1155165, 0.038857397, 0.35014954, -0.38389844, -0.06027076, -0.0022453207, -0.33463535, 0.14471063, 0.08705408, 0.21836443, -0.37401456, -0.018341504, 0.39743432, -0.28624484, 0.11763633, 0.31765255, 0.06626895, 0.36575744, -0.022255607, -0.010258075, 0.034847856, -0.23807333, -0.009544487, 0.13759036, 0.5633902, 0.15055975, -0.3893473, 0.099084534, 0.2487808, -0.18436259, 0.2964102, -0.0974038, -0.030039463, 0.26750058, -0.0327042, 0.13959204, -0.09072471, -0.23449129, -0.31150666, 0.37843606, -0.20243177, -0.10671544, -0.15774731, -0.100252956, -0.17092793, 0.043813135, -0.3745126, 0.324973, 0.13483994, -0.17662391, -0.082839765, -0.095041975, -0.13983752, -0.2234264, -0.25730914, 0.43138906, -0.15586619, -0.45626152, 0.25078535, 0.032389496, 0.3605528, 0.043308545, 0.082841, -0.05712051, 0.15781389, 0.09113666, -0.12152442, 0.26758134, 0.07198326, -0.5393054, -0.12838638, -0.20900357, 0.08293295, 0.20305818, -0.34222803, 0.030090628, 0.011068957, 0.15267001, 0.0166323, -0.09774065, -0.07362094, 0.4034169, 0.24240533, 0.29080132, 0.093652435, 0.22641939, -0.027256148, -0.33472311, 0.0421845, 0.084676325, -0.18856609, 0.41979697, -0.0742718, -0.37898067, -0.06616097, 0.40372992, 0.097068146, 0.028025001, -0.053321853, 0.21186478, 0.1617424, -0.14168897, 0.17353843, -0.026624104, -0.13292201, -0.10193648, 0.082835816, -0.2110324, 0.047005363, -0.15010522, -0.011524656, -0.21158908, 0.013927639, -0.20468919, 0.2549774, -0.30430713, 0.08803344, 0.04733768, 0.29572278, -0.34726125, -0.16165416, -0.07808548, 0.13735756, 0.26037022, 0.34519985, 0.02743408, 0.017144704, -0.16387558, -0.24629596, 0.05140357, -0.2149917, 0.14342026, 0.09417347, 0.2471141, -0.3102873, -0.1811054, 0.21190523, -0.0939989, -0.14163078, 0.39620474, 0.26059932, 0.2017156, 0.016578814, 0.2596664, 0.023013765, -0.18858111, -0.12537004, -0.2575313, 0.07671513, -0.084907316, -0.05217875, -0.0572651, -0.11264922, -0.20340498, -0.15451567, 0.15938708, 0.13023944, 0.024635538, -0.04755041, -0.013276761, -0.27871516, 0.32686794, 0.025345188, 0.056448072, -0.05157638, 0.055301744, -0.14052545, 0.23356776, 0.21044475, 0.074066274, -0.19347996, -0.075556666, -0.2903639, -0.35939184, 0.06149839, 0.122687556, 0.10089751, -0.076755464, -0.23811671, -0.00608524, -0.15459716, 0.19323912, 0.024601229, -0.16377638, -0.085954376, -0.042363558, -0.026964856, 0.068481274, -0.19715573, -0.19233052, -0.1171513, -0.07371926, -0.06850815, 0.3406645, -0.045532167, 0.27094424, -0.14512636, -0.004277191, -0.17940599, 0.10672061, -0.0615622, 0.05335321, 0.26168355, -0.4460772, -0.16097912, -0.03129851, -0.18674865, -0.14036278, -0.08535438, -0.030956995, 0.2338251, -0.3615369, 0.21861416, -0.12684713, 0.18677162, -0.07666991, -0.25236502, -0.14463082, -0.011356218, 0.24449459, -0.3400799, -0.2528164, -0.2593893, -0.09919247, -0.06947468, -0.2653912, 0.41220212, -0.105682805, -0.06773623, 0.0067983535, 0.4691811, 0.21326056, 0.16177358, 0.22778879, -0.032021854, 0.030037731, 0.09594971, -0.45707417, 0.25093716, -0.24953341, -0.13460502, 0.012793443, 0.123196214, -0.007967554, 0.021584822, -0.13040859, -0.09390109, 0.21663658, -0.35579094, -0.04199463, 0.26041034, 0.14653488, -0.24590142, 0.025850672, 0.12220801, 0.38460156, 0.07916422, -0.2231287, 0.10919217, -0.3318051, -0.01628104, -0.19389302, -0.30398846, 0.16407341, -0.07596026, 0.077111095, -0.100624435, -0.30112073, 0.20480298, -0.079184264, -0.07005212, 0.42865524, 0.028614324, -0.12899904, 0.15557994, 0.021030005, 0.036799897, -0.099277705, 0.2806157, 0.23383352, -0.28756225, 0.15074758, -0.13403846, -0.05876428, -0.07657761]}] |[{doc_similarity_rankings, 0, 71, Second document, this is my second sentence. This is my second sentence., {pieceId -> -1, lshId -> 1634839239, isWordStart -> true, token -> Second document, this is my second sentence. This is my second sentence., lshNeighbors -> [(1510101612,0.12448559273510636),(1274183715,0.3554576544360366),(-612640902,0.37472233818858686),(-1548374770,0.39013800843493296),(-1719102856,0.3901714913624425),(1293373212,0.39846872824443047),(-1320876223,0.3992484826857293)], sentence -> 0}, [-7.9203E-4, -0.19994189, -0.21818015, -0.068899736, 0.12664562, 0.1954791, 0.25884947, -0.08906762, -0.096521795, -0.15146676, 0.23279426, 0.02231225, -0.091652475, 0.088964544, -0.13240255, 0.48075354, 0.22785556, -0.45156693, 0.062804036, -0.020177238, -0.25701693, 0.055076845, 0.4630905, 0.34240133, 0.09377383, 0.054915123, -0.14161351, -0.017130367, 0.1733012, 0.23704202, 0.28406826, 0.06854082, 0.10759672, 0.258621, -0.23658031, 0.06974518, -0.30307853, 0.010062803, 0.24419631, -0.17857389, -0.061605684, 0.15594025, 0.20267735, -0.14955261, -0.11479992, 0.39396986, 0.23352055, 0.012666446, -0.0962206, -0.09166719, -0.35843217, 0.33310696, 0.28614143, 0.19793509, -0.014299779, 0.03125637, -0.15131183, 0.26195115, -0.092621945, -0.09774493, -0.10871576, -0.20030053, -0.014465115, -0.058903776, 0.010456275, -0.12946285, 0.090752184, -0.13612632, -0.15917492, 0.073066816, -0.07082851, 0.15889694, 0.16751619, -0.31353, -0.28896278, 0.061326317, -0.5799537, -0.11939915, 0.28790224, 0.44152555, -0.119611256, 0.19590247, 0.051425744, 0.21189548, -0.01531638, -0.046113886, -0.056209974, -0.10596114, 0.18074782, 0.27465618, -0.20207298, -0.38995308, 0.055122897, 0.023203688, -0.0892057, 0.0093817655, -0.014075832, -0.06670877, -0.15410645, -0.1759522, 0.083973736, -0.24712972, -0.13847028, 0.26776898, -0.03092194, -0.20474298, -0.010832185, 0.28615507, 0.06926395, -0.09348502, -0.15698338, 0.41989443, 0.3010278, 0.005741507, 0.010479024, 0.16931498, 0.12767749, -0.29227796, 0.42611268, -0.30473405, -0.0071336213, -0.100375555, 0.09961725, 0.14721175, -0.21532084, 0.28975573, 0.15805171, 0.2587544, 0.18892317, 0.08308744, -0.053409446, 0.14420575, -0.117936045, 0.16155364, 0.21884927, 0.11623547, 0.016398214, -0.34012944, -0.23610827, 0.25944826, 0.3265321, 0.17884685, -0.029824225, 0.17586859, 0.11585, 0.22395201, 0.1441974, -0.40962303, 0.055336952, 0.33760974, 0.11725796, 0.1549155, -0.057688136, -0.27527377, -0.23793808, -0.06886171, 0.04727421, -0.32366428, -0.124764286, 0.37829912, 0.044418834, -0.014433529, -0.15863699, -0.23130025, -0.014072199, -0.116705395, 0.026241766, 0.082573965, -0.089801684, -0.39718676, -0.09980003, -0.5494743, -0.1430713, 0.21158108, -0.30317372, 0.24022597, -0.3023537, 0.111940525, 0.37496758, 0.05516233, 0.011221796, -0.18284394, -0.0047331, 0.13796207, 0.32586053, 0.23857506, -0.38834578, 0.09571896, 0.15859152, 0.2515127, 0.12696691, -0.037285045, -0.13651451, 0.13980338, -0.198333, 0.15924208, -0.20204458, 0.18000285, -0.24857216, -0.20852609, 0.29131892, -0.41542646, -0.04368751, 0.10883073, 0.26118433, 0.013824453, -0.027603198, -0.08514061, 0.117200404, 0.18326314, 0.14687406, -0.3891525, 0.27375975, -0.02553157, -0.033559944, -0.038643405, 0.1725868, 0.24817786, 0.10062808, -0.3984106, -0.15110281, 0.107347734, 0.2811384, -0.22248136, 0.17181505, -0.30180675, -0.37590006, -0.13890691, 0.1929018, 0.21564123, 0.14888352, -0.2589628, 0.18979052, -0.09733246, -0.44260895, -0.3626373, -0.10656619, 0.24600953, 0.17717755, 0.17176694, 0.24860108, 0.02450866, 0.13089818, 0.14760958, 0.1889736, -0.13462782, 0.17552358, -0.36474925, -0.054144062, -0.2719437, -0.20575465, -0.2259518, 0.390171, -0.1989032, 0.23816091, 0.39332652, -0.30287528, -0.11111271, 0.15751657, 0.10828888, 0.061609615, -0.13818201, 0.21809433, 0.17633602, -0.10041227, 0.22838311, -0.0037272298, 0.27824283, 0.19050625, 0.09929577, 0.14321278, 0.12866658, -0.1560034, 0.054702457, 0.0034277993, -0.023457147, -0.24974422, -0.12811284, 0.2138116, -0.043564834, 0.024615834, -0.17061573, -0.105848975, 0.01979917, 0.3876299, -0.3644679, 0.25748017, 0.0794696, 0.15106939, -0.2271831, -0.2029017, 0.06485374, 0.18893777, -0.40528575, -0.0067824377, 0.15022416, 0.11026635, 0.21823986, 0.25255138, -0.009357705, -0.11272444, 0.48919556, -0.14951092, -0.12166809, 0.28113428, -0.26702005, -0.2696553, 0.25774094, -0.020699237, 0.30593178, 0.13539925, 0.023027057, 0.04737817, -0.60946774, 0.07452417, -0.4618816, -0.012704029, 0.017143121, -0.0928661, -0.18195343, 0.13803084, 0.26418334, -0.25597656, -0.028610228, 0.20601481, 0.06913206, -0.105633095, 0.48713952, -0.017569678, 0.21389663, -0.06817315, 0.26016077, -0.2034709, 0.27557772, -0.27667582, -0.11112172, 0.017718645, 0.084379435, 0.043280818, -0.059255335, -0.3404926, 0.22076394, 0.0044043344, -0.037489407, -0.06305672, 0.09715581, -0.023487065, 0.062111195, 0.05335917, 0.32398656, 0.2121781, -0.015352369, -0.37581238, -0.02546437, -0.1003463, 0.049476393, 0.01883333, -0.03315766, 0.43547428, -0.10425473, 0.011709515, -0.13826483, 0.26741698, 0.20992693, 0.14296642, 0.12905589, 0.091058806, 0.15014836, -0.033118833, -0.0050203684, -0.1263514, -0.2340833, -0.27371702, 0.21375184, -0.22322327, -0.15550363, 0.16027404, 0.22693431, -0.12760872, 0.16786651, 0.30304092, 0.1020746, -0.15578815, 0.25656548, -0.101949446, 0.127458, 0.31914127, -0.029818093, 0.17126171, 0.5050371, 0.2140625, -0.36993846, -0.019031882, -0.21702774, -0.0075844345, 0.23762833, -0.13128382, 0.17342623, 0.3832189, 0.30292338, 0.45272982, -0.014391475, -0.10896908, 0.092533424, 0.22071956, 0.023630338, -0.15142313, -0.16476907, 0.2736263, 0.06355073, -0.14254087, -0.017445322, -0.15101986, 0.018103518, -0.13215697, -0.39054778, 0.031270053, 0.20609067, -0.4899748, 0.10231394, -0.28053063, 0.042639177, -0.2338356, 0.21435437, -0.23163229, -0.10278779, 0.38594186, -0.08828131, 0.033451065, -0.15233083, -0.14713119, 0.015353501, 0.024095697, -0.04088602, -0.021885296, 0.35037458, -0.12262792, 0.029172843, 0.031767074, 0.20502482, -0.059423614, 0.17006303, 0.03032189, -0.14278243, -0.38293302, 0.12350028, -0.20444815, -0.446956, -0.38407224, 0.36807615, -0.14949287, -0.25289682, -0.22304098, -0.26412737, 0.054438926, 0.1866949, 0.46163344, -0.3870432, -0.09577562, 0.4925763, -0.07591358, -0.17125858, 0.29294312, 0.18506376, -0.3011424, 0.33213896, 0.27247593, -0.04371708, 0.022509042, 0.5281235, 0.11490675, 0.18309964, -0.2098084, 0.44932392, -0.2238262, 0.32604268, -0.15083954, -0.19439663, -0.21758024, 0.016958022, 0.31868294, 0.20522523, -0.4294383, -0.1155165, 0.038857397, 0.35014954, -0.38389844, -0.06027076, -0.0022453207, -0.33463535, 0.14471063, 0.08705408, 0.21836443, -0.37401456, -0.018341504, 0.39743432, -0.28624484, 0.11763633, 0.31765255, 0.06626895, 0.36575744, -0.022255607, -0.010258075, 0.034847856, -0.23807333, -0.009544487, 0.13759036, 0.5633902, 0.15055975, -0.3893473, 0.099084534, 0.2487808, -0.18436259, 0.2964102, -0.0974038, -0.030039463, 0.26750058, -0.0327042, 0.13959204, -0.09072471, -0.23449129, -0.31150666, 0.37843606, -0.20243177, -0.10671544, -0.15774731, -0.100252956, -0.17092793, 0.043813135, -0.3745126, 0.324973, 0.13483994, -0.17662391, -0.082839765, -0.095041975, -0.13983752, -0.2234264, -0.25730914, 0.43138906, -0.15586619, -0.45626152, 0.25078535, 0.032389496, 0.3605528, 0.043308545, 0.082841, -0.05712051, 0.15781389, 0.09113666, -0.12152442, 0.26758134, 0.07198326, -0.5393054, -0.12838638, -0.20900357, 0.08293295, 0.20305818, -0.34222803, 0.030090628, 0.011068957, 0.15267001, 0.0166323, -0.09774065, -0.07362094, 0.4034169, 0.24240533, 0.29080132, 0.093652435, 0.22641939, -0.027256148, -0.33472311, 0.0421845, 0.084676325, -0.18856609, 0.41979697, -0.0742718, -0.37898067, -0.06616097, 0.40372992, 0.097068146, 0.028025001, -0.053321853, 0.21186478, 0.1617424, -0.14168897, 0.17353843, -0.026624104, -0.13292201, -0.10193648, 0.082835816, -0.2110324, 0.047005363, -0.15010522, -0.011524656, -0.21158908, 0.013927639, -0.20468919, 0.2549774, -0.30430713, 0.08803344, 0.04733768, 0.29572278, -0.34726125, -0.16165416, -0.07808548, 0.13735756, 0.26037022, 0.34519985, 0.02743408, 0.017144704, -0.16387558, -0.24629596, 0.05140357, -0.2149917, 0.14342026, 0.09417347, 0.2471141, -0.3102873, -0.1811054, 0.21190523, -0.0939989, -0.14163078, 0.39620474, 0.26059932, 0.2017156, 0.016578814, 0.2596664, 0.023013765, -0.18858111, -0.12537004, -0.2575313, 0.07671513, -0.084907316, -0.05217875, -0.0572651, -0.11264922, -0.20340498, -0.15451567, 0.15938708, 0.13023944, 0.024635538, -0.04755041, -0.013276761, -0.27871516, 0.32686794, 0.025345188, 0.056448072, -0.05157638, 0.055301744, -0.14052545, 0.23356776, 0.21044475, 0.074066274, -0.19347996, -0.075556666, -0.2903639, -0.35939184, 0.06149839, 0.122687556, 0.10089751, -0.076755464, -0.23811671, -0.00608524, -0.15459716, 0.19323912, 0.024601229, -0.16377638, -0.085954376, -0.042363558, -0.026964856, 0.068481274, -0.19715573, -0.19233052, -0.1171513, -0.07371926, -0.06850815, 0.3406645, -0.045532167, 0.27094424, -0.14512636, -0.004277191, -0.17940599, 0.10672061, -0.0615622, 0.05335321, 0.26168355, -0.4460772, -0.16097912, -0.03129851, -0.18674865, -0.14036278, -0.08535438, -0.030956995, 0.2338251, -0.3615369, 0.21861416, -0.12684713, 0.18677162, -0.07666991, -0.25236502, -0.14463082, -0.011356218, 0.24449459, -0.3400799, -0.2528164, -0.2593893, -0.09919247, -0.06947468, -0.2653912, 0.41220212, -0.105682805, -0.06773623, 0.0067983535, 0.4691811, 0.21326056, 0.16177358, 0.22778879, -0.032021854, 0.030037731, 0.09594971, -0.45707417, 0.25093716, -0.24953341, -0.13460502, 0.012793443, 0.123196214, -0.007967554, 0.021584822, -0.13040859, -0.09390109, 0.21663658, -0.35579094, -0.04199463, 0.26041034, 0.14653488, -0.24590142, 0.025850672, 0.12220801, 0.38460156, 0.07916422, -0.2231287, 0.10919217, -0.3318051, -0.01628104, -0.19389302, -0.30398846, 0.16407341, -0.07596026, 0.077111095, -0.100624435, -0.30112073, 0.20480298, -0.079184264, -0.07005212, 0.42865524, 0.028614324, -0.12899904, 0.15557994, 0.021030005, 0.036799897, -0.099277705, 0.2806157, 0.23383352, -0.28756225, 0.15074758, -0.13403846, -0.05876428, -0.07657761]}] |1634839239 |[(1510101612,0.12448559273510636),(1274183715,0.3554576544360366),(-612640902,0.37472233818858686),(-1548374770,0.39013800843493296),(-1719102856,0.3901714913624425),(1293373212,0.39846872824443047),(-1320876223,0.3992484826857293)]|1510101612 |0.12448559273510636 |\n", + "|Third document, climate change is arguably one of the most pressing problems of our time. |[{document, 0, 88, Third document, climate change is arguably one of the most pressing problems of our time., {sentence -> 0}, []}] |[{document, 0, 88, Third document, climate change is arguably one of the most pressing problems of our time., {sentence -> 0}, []}] |[{token, 0, 4, Third, {sentence -> 0}, []}, {token, 6, 13, document, {sentence -> 0}, []}, {token, 14, 14, ,, {sentence -> 0}, []}, {token, 16, 22, climate, {sentence -> 0}, []}, {token, 24, 29, change, {sentence -> 0}, []}, {token, 31, 32, is, {sentence -> 0}, []}, {token, 34, 41, arguably, {sentence -> 0}, []}, {token, 43, 45, one, {sentence -> 0}, []}, {token, 47, 48, of, {sentence -> 0}, []}, {token, 50, 52, the, {sentence -> 0}, []}, {token, 54, 57, most, {sentence -> 0}, []}, {token, 59, 66, pressing, {sentence -> 0}, []}, {token, 68, 75, problems, {sentence -> 0}, []}, {token, 77, 78, of, {sentence -> 0}, []}, {token, 80, 82, our, {sentence -> 0}, []}, {token, 84, 87, time, {sentence -> 0}, []}, {token, 88, 88, ., {sentence -> 0}, []}] |[{sentence_embeddings, 0, 88, Third document, climate change is arguably one of the most pressing problems of our time., {sentence -> 0, token -> Third document, climate change is arguably one of the most pressing problems of our time., pieceId -> -1, isWordStart -> true}, [-0.008408386, -0.20978682, -0.21336395, -0.06701713, 0.14906062, 0.19436543, 0.2689835, -0.0736583, -0.085654296, -0.17641146, 0.23118123, 9.153709E-4, -0.095650904, 0.09745815, -0.12966505, 0.4991491, 0.21626908, -0.46584547, 0.036485918, -0.03004483, -0.2527194, 0.066779986, 0.4715149, 0.3309422, 0.10767734, 0.0772766, -0.13837156, -0.04646236, 0.18541306, 0.24775545, 0.29829848, 0.06701967, 0.10679874, 0.2371618, -0.2348681, 0.060263738, -0.30324578, 0.011110125, 0.25174752, -0.21265753, -0.07388571, 0.16692835, 0.21960892, -0.17161724, -0.10685435, 0.40878117, 0.22626911, 0.02965304, -0.12355306, -0.08647978, -0.36791545, 0.3402064, 0.29458448, 0.18367597, 0.018515693, 0.016963914, -0.16416565, 0.23452581, -0.08742108, -0.10209338, -0.11075058, -0.21584131, -0.0066427714, -0.057909314, 0.02990299, -0.1532408, 0.10471724, -0.15802106, -0.1316932, 0.0671004, -0.08458309, 0.14720063, 0.17743425, -0.29781777, -0.25875703, 0.052992765, -0.601354, -0.12203182, 0.28960162, 0.43318143, -0.11552292, 0.189184, 0.058978103, 0.22802205, -0.01887668, -0.06247763, -0.055835284, -0.10375289, 0.1687678, 0.28621984, -0.20247233, -0.39158803, 0.047121175, 0.03066416, -0.09131708, 0.0064128875, -0.008600525, -0.07319706, -0.17605102, -0.21015038, 0.09048563, -0.27109554, -0.14635244, 0.25777438, -0.03020101, -0.2038186, -0.015589433, 0.29672548, 0.078363374, -0.09256426, -0.17258734, 0.41875502, 0.32983577, -0.007111206, 0.028960332, 0.17240305, 0.11726792, -0.2967749, 0.42062518, -0.30623642, -0.018673455, -0.1085469, 0.13539384, 0.15505964, -0.2071793, 0.29197103, 0.13746233, 0.2660806, 0.19184366, 0.09343793, -0.03761699, 0.1458473, -0.118322745, 0.13873918, 0.20815493, 0.11604553, -0.0011643076, -0.35143498, -0.21321355, 0.2749992, 0.35669494, 0.1721764, -0.05118688, 0.19282715, 0.1083012, 0.20331307, 0.14773382, -0.400585, 0.04467102, 0.3526557, 0.12134734, 0.15779114, -0.07686861, -0.2722919, -0.25330427, -0.08266977, 0.051231362, -0.32711402, -0.13077538, 0.38633937, 0.029779907, 0.002141976, -0.15053126, -0.21427684, -0.03073989, -0.12185571, 0.030493725, 0.09356483, -0.08676565, -0.4158287, -0.11258328, -0.55825645, -0.12975422, 0.1916057, -0.3010512, 0.25087264, -0.28968638, 0.0923594, 0.39556426, 0.04983867, -0.0029593082, -0.19394685, -0.025053008, 0.1306449, 0.3228945, 0.23752072, -0.39854398, 0.09963036, 0.17405853, 0.260578, 0.13261874, -0.042916767, -0.13150778, 0.14103265, -0.18873711, 0.18550456, -0.21395086, 0.16902032, -0.2656128, -0.2184284, 0.28208458, -0.40157455, -0.031665407, 0.065816626, 0.27281633, 0.0017664516, -0.056637276, -0.08971444, 0.091889165, 0.18004844, 0.14723916, -0.40347266, 0.28464735, -0.029195225, -0.03000159, -0.024617586, 0.196627, 0.25832683, 0.10727102, -0.3850497, -0.14131209, 0.11651645, 0.28590408, -0.21434435, 0.18322834, -0.29039705, -0.3833519, -0.12992118, 0.20409176, 0.21254545, 0.17306952, -0.25796944, 0.18709883, -0.106259346, -0.43154985, -0.36494273, -0.11600778, 0.24581338, 0.18781164, 0.18708797, 0.25258115, 0.04334878, 0.11094503, 0.16887024, 0.18969853, -0.16395922, 0.17028388, -0.3653791, -0.062085465, -0.27042845, -0.20845734, -0.22975564, 0.38791245, -0.21153997, 0.22299886, 0.4067098, -0.3180967, -0.119020626, 0.16592985, 0.08134824, 0.07111609, -0.11281331, 0.21532886, 0.17626029, -0.11157626, 0.24869683, 0.002167793, 0.25803974, 0.17146061, 0.11219464, 0.14177255, 0.13628139, -0.14174575, 0.040052604, 0.0049264827, -0.027609568, -0.25137493, -0.14084421, 0.20105876, -0.058221187, 0.030821402, -0.15669239, -0.097033136, 0.02064872, 0.40998495, -0.3793549, 0.26364204, 0.09185891, 0.13761812, -0.23594798, -0.21576473, 0.064160846, 0.18441695, -0.39402333, 0.011562835, 0.15811875, 0.10871025, 0.2278601, 0.26277956, -0.011325682, -0.12331526, 0.4965102, -0.15407956, -0.13325658, 0.27157038, -0.25075117, -0.27684662, 0.28200155, -0.010683825, 0.30480832, 0.13320027, 0.031010821, 0.07333867, -0.6036074, 0.06996156, -0.4761334, -0.0033244402, 0.03359116, -0.097072996, -0.19554745, 0.15043412, 0.29090378, -0.24856627, -0.02541361, 0.2121456, 0.08295338, -0.09810014, 0.4901113, -0.022632308, 0.22354102, -0.060473014, 0.26167145, -0.21305074, 0.25782862, -0.26308486, -0.09908113, 0.007932383, 0.08765336, 0.052684747, -0.05878163, -0.31929326, 0.21060982, -0.006360022, -0.035593312, -0.04580954, 0.092614815, -0.010202253, 0.053435646, 0.06810382, 0.33141363, 0.23865972, -0.022269802, -0.35638827, -0.017010234, -0.09842304, 0.044951804, 0.013101696, 0.012481737, 0.4473684, -0.07812174, 0.008848828, -0.10374172, 0.26849997, 0.19759308, 0.14334072, 0.12648976, 0.085015506, 0.16048867, -0.04428103, -0.02241106, -0.15643533, -0.2261526, -0.27816802, 0.21147077, -0.23083511, -0.15736212, 0.16844489, 0.21279429, -0.116731785, 0.13067369, 0.3122624, 0.136315, -0.16236319, 0.266095, -0.118092746, 0.08463656, 0.3190953, -0.025155172, 0.17036186, 0.517092, 0.21172866, -0.37978435, -0.033721708, -0.20605312, -0.010164018, 0.260588, -0.16144288, 0.1739176, 0.37816438, 0.31039205, 0.44743317, -0.006673559, -0.11675396, 0.07569114, 0.20170745, 0.012370929, -0.17593956, -0.16927823, 0.26412624, 0.05698451, -0.14810121, -0.023506388, -0.16487631, 0.026122056, -0.14697117, -0.4044809, 0.04526173, 0.21642078, -0.4945617, 0.10950964, -0.29737392, 0.06892894, -0.2156567, 0.22239617, -0.23322712, -0.10665675, 0.39332575, -0.09363847, 0.050910737, -0.19002058, -0.1574706, 0.02213912, 0.036516193, -0.028725138, -0.017847586, 0.3557758, -0.12832177, 0.048198838, 4.3433104E-4, 0.22483422, -0.061355013, 0.18990348, 0.03564027, -0.1394448, -0.39040518, 0.15794353, -0.20287846, -0.4367065, -0.37070453, 0.35814124, -0.14648235, -0.26215193, -0.2346811, -0.27164423, 0.06335842, 0.18181078, 0.4646271, -0.39600596, -0.079446204, 0.49517226, -0.04852778, -0.16844611, 0.2967518, 0.19760415, -0.31121883, 0.34088603, 0.25469545, -0.018662168, 0.02773218, 0.51845956, 0.13568775, 0.18219014, -0.23326635, 0.46436208, -0.21790144, 0.31559125, -0.17255287, -0.20793658, -0.20586798, -0.001172778, 0.3308969, 0.17670253, -0.42809558, -0.10740692, 0.036743138, 0.35039994, -0.37736034, -0.06748311, 0.0051405686, -0.35943413, 0.1323299, 0.076594, 0.23542015, -0.38947168, -0.0032625643, 0.40785658, -0.3197368, 0.13108508, 0.31222928, 0.10504441, 0.376466, -0.039943226, 0.0075853164, 0.062115036, -0.23973337, -0.015673233, 0.124596596, 0.567843, 0.13057941, -0.40122274, 0.10246062, 0.25044444, -0.19272071, 0.3008771, -0.085480474, -0.04598082, 0.27369395, -0.024367249, 0.16280788, -0.09723148, -0.2331273, -0.31018296, 0.3681214, -0.20254564, -0.099116005, -0.16499473, -0.111495964, -0.13236894, 0.04578884, -0.36830792, 0.33352196, 0.118905395, -0.20436546, -0.08910314, -0.07557359, -0.15557393, -0.234037, -0.2726709, 0.43529606, -0.1588916, -0.44991276, 0.2639855, 0.03949102, 0.3693944, 0.016801286, 0.10393568, -0.069996394, 0.15449597, 0.08037712, -0.13697414, 0.28319156, 0.066741705, -0.5410128, -0.14622404, -0.2038448, 0.08338539, 0.2011268, -0.33919087, 0.04361721, 0.02101723, 0.14619805, 0.030959083, -0.09712685, -0.07434212, 0.403413, 0.22790873, 0.28365055, 0.09378035, 0.23105437, -0.029275687, -0.3173693, 0.0543687, 0.07409911, -0.2017265, 0.45038718, -0.08160741, -0.39471105, -0.066249356, 0.41151386, 0.08760141, 0.007330824, -0.066278905, 0.21011162, 0.1539203, -0.12142708, 0.1757174, -0.026292767, -0.14986737, -0.113564014, 0.07391096, -0.21108653, 0.06545459, -0.16722327, -0.006594374, -0.20676054, 0.016034845, -0.20714003, 0.2670048, -0.31555256, 0.10177538, 0.06680942, 0.28953385, -0.34272602, -0.1623317, -0.076802626, 0.15219814, 0.26613286, 0.34149784, 0.032789387, 0.028899977, -0.18126866, -0.2511501, 0.081135035, -0.21000236, 0.16121005, 0.08403291, 0.24493581, -0.32109466, -0.20124927, 0.20610592, -0.10798646, -0.14663638, 0.40788773, 0.24154414, 0.21223918, 0.0129725, 0.26252237, 0.025581531, -0.17492732, -0.12604713, -0.24954422, 0.07021474, -0.078310095, -0.051400866, -0.035224427, -0.11225072, -0.1952861, -0.17365551, 0.14578226, 0.14051022, 0.022287847, -0.057631906, -0.0139911715, -0.27890974, 0.32389244, 0.02819324, 0.05819198, -0.069384664, 0.049535263, -0.13024889, 0.23608364, 0.21746805, 0.077531695, -0.19169594, -0.06775553, -0.27078623, -0.35225046, 0.058470953, 0.12146965, 0.13006094, -0.06770603, -0.25335953, 0.007816828, -0.13747253, 0.17829442, 0.011401831, -0.16540085, -0.103149, -0.069404215, -0.03443399, 0.077324726, -0.19909632, -0.20789692, -0.10723613, -0.08203292, -0.07713148, 0.34724632, -0.06089041, 0.2798024, -0.14503583, -0.0020078092, -0.15062527, 0.13015121, -0.065569915, 0.056575716, 0.25992274, -0.46167943, -0.15914112, -0.026965503, -0.20004211, -0.141356, -0.077509604, -0.027742084, 0.22565441, -0.36088252, 0.2413167, -0.12192323, 0.18409002, -0.08263175, -0.25700292, -0.16171978, -0.0079028085, 0.24911962, -0.3509631, -0.25993952, -0.2676477, -0.08899839, -0.06546339, -0.26874635, 0.42236093, -0.12012787, -0.08681204, 0.0034873153, 0.4458711, 0.20714682, 0.17381635, 0.20979515, -0.0273073, 0.03752274, 0.11206485, -0.46285358, 0.22496869, -0.2342749, -0.12822811, 0.0051306086, 0.109438226, -0.02305657, 0.030262345, -0.12668712, -0.11175425, 0.21646728, -0.37218267, -0.06411932, 0.27182007, 0.16698217, -0.26499093, 0.042440798, 0.13600044, 0.37879592, 0.11507201, -0.2173002, 0.1320668, -0.32382184, -0.037356067, -0.20496605, -0.2866237, 0.16485146, -0.081692345, 0.06725929, -0.10196793, -0.28608373, 0.21656291, -0.064518094, -0.07937422, 0.42675838, 0.0076278546, -0.12415507, 0.14044033, 0.014213737, 0.030048521, -0.0996337, 0.2774318, 0.20182906, -0.2818618, 0.1473123, -0.1531179, -0.04014678, -0.10299498]}] |[{doc_similarity_rankings, 0, 88, Third document, climate change is arguably one of the most pressing problems of our time., {pieceId -> -1, lshId -> -612640902, isWordStart -> true, token -> Third document, climate change is arguably one of the most pressing problems of our time., lshNeighbors -> [(1274183715,0.12201215887654807),(-1719102856,0.2991777399965483),(-1548374770,0.31909423657258823),(-1320876223,0.32308714836804664),(1293373212,0.3656377678477694),(1634839239,0.37472233818858686),(1510101612,0.3851201869623605)], sentence -> 0}, [-0.008408386, -0.20978682, -0.21336395, -0.06701713, 0.14906062, 0.19436543, 0.2689835, -0.0736583, -0.085654296, -0.17641146, 0.23118123, 9.153709E-4, -0.095650904, 0.09745815, -0.12966505, 0.4991491, 0.21626908, -0.46584547, 0.036485918, -0.03004483, -0.2527194, 0.066779986, 0.4715149, 0.3309422, 0.10767734, 0.0772766, -0.13837156, -0.04646236, 0.18541306, 0.24775545, 0.29829848, 0.06701967, 0.10679874, 0.2371618, -0.2348681, 0.060263738, -0.30324578, 0.011110125, 0.25174752, -0.21265753, -0.07388571, 0.16692835, 0.21960892, -0.17161724, -0.10685435, 0.40878117, 0.22626911, 0.02965304, -0.12355306, -0.08647978, -0.36791545, 0.3402064, 0.29458448, 0.18367597, 0.018515693, 0.016963914, -0.16416565, 0.23452581, -0.08742108, -0.10209338, -0.11075058, -0.21584131, -0.0066427714, -0.057909314, 0.02990299, -0.1532408, 0.10471724, -0.15802106, -0.1316932, 0.0671004, -0.08458309, 0.14720063, 0.17743425, -0.29781777, -0.25875703, 0.052992765, -0.601354, -0.12203182, 0.28960162, 0.43318143, -0.11552292, 0.189184, 0.058978103, 0.22802205, -0.01887668, -0.06247763, -0.055835284, -0.10375289, 0.1687678, 0.28621984, -0.20247233, -0.39158803, 0.047121175, 0.03066416, -0.09131708, 0.0064128875, -0.008600525, -0.07319706, -0.17605102, -0.21015038, 0.09048563, -0.27109554, -0.14635244, 0.25777438, -0.03020101, -0.2038186, -0.015589433, 0.29672548, 0.078363374, -0.09256426, -0.17258734, 0.41875502, 0.32983577, -0.007111206, 0.028960332, 0.17240305, 0.11726792, -0.2967749, 0.42062518, -0.30623642, -0.018673455, -0.1085469, 0.13539384, 0.15505964, -0.2071793, 0.29197103, 0.13746233, 0.2660806, 0.19184366, 0.09343793, -0.03761699, 0.1458473, -0.118322745, 0.13873918, 0.20815493, 0.11604553, -0.0011643076, -0.35143498, -0.21321355, 0.2749992, 0.35669494, 0.1721764, -0.05118688, 0.19282715, 0.1083012, 0.20331307, 0.14773382, -0.400585, 0.04467102, 0.3526557, 0.12134734, 0.15779114, -0.07686861, -0.2722919, -0.25330427, -0.08266977, 0.051231362, -0.32711402, -0.13077538, 0.38633937, 0.029779907, 0.002141976, -0.15053126, -0.21427684, -0.03073989, -0.12185571, 0.030493725, 0.09356483, -0.08676565, -0.4158287, -0.11258328, -0.55825645, -0.12975422, 0.1916057, -0.3010512, 0.25087264, -0.28968638, 0.0923594, 0.39556426, 0.04983867, -0.0029593082, -0.19394685, -0.025053008, 0.1306449, 0.3228945, 0.23752072, -0.39854398, 0.09963036, 0.17405853, 0.260578, 0.13261874, -0.042916767, -0.13150778, 0.14103265, -0.18873711, 0.18550456, -0.21395086, 0.16902032, -0.2656128, -0.2184284, 0.28208458, -0.40157455, -0.031665407, 0.065816626, 0.27281633, 0.0017664516, -0.056637276, -0.08971444, 0.091889165, 0.18004844, 0.14723916, -0.40347266, 0.28464735, -0.029195225, -0.03000159, -0.024617586, 0.196627, 0.25832683, 0.10727102, -0.3850497, -0.14131209, 0.11651645, 0.28590408, -0.21434435, 0.18322834, -0.29039705, -0.3833519, -0.12992118, 0.20409176, 0.21254545, 0.17306952, -0.25796944, 0.18709883, -0.106259346, -0.43154985, -0.36494273, -0.11600778, 0.24581338, 0.18781164, 0.18708797, 0.25258115, 0.04334878, 0.11094503, 0.16887024, 0.18969853, -0.16395922, 0.17028388, -0.3653791, -0.062085465, -0.27042845, -0.20845734, -0.22975564, 0.38791245, -0.21153997, 0.22299886, 0.4067098, -0.3180967, -0.119020626, 0.16592985, 0.08134824, 0.07111609, -0.11281331, 0.21532886, 0.17626029, -0.11157626, 0.24869683, 0.002167793, 0.25803974, 0.17146061, 0.11219464, 0.14177255, 0.13628139, -0.14174575, 0.040052604, 0.0049264827, -0.027609568, -0.25137493, -0.14084421, 0.20105876, -0.058221187, 0.030821402, -0.15669239, -0.097033136, 0.02064872, 0.40998495, -0.3793549, 0.26364204, 0.09185891, 0.13761812, -0.23594798, -0.21576473, 0.064160846, 0.18441695, -0.39402333, 0.011562835, 0.15811875, 0.10871025, 0.2278601, 0.26277956, -0.011325682, -0.12331526, 0.4965102, -0.15407956, -0.13325658, 0.27157038, -0.25075117, -0.27684662, 0.28200155, -0.010683825, 0.30480832, 0.13320027, 0.031010821, 0.07333867, -0.6036074, 0.06996156, -0.4761334, -0.0033244402, 0.03359116, -0.097072996, -0.19554745, 0.15043412, 0.29090378, -0.24856627, -0.02541361, 0.2121456, 0.08295338, -0.09810014, 0.4901113, -0.022632308, 0.22354102, -0.060473014, 0.26167145, -0.21305074, 0.25782862, -0.26308486, -0.09908113, 0.007932383, 0.08765336, 0.052684747, -0.05878163, -0.31929326, 0.21060982, -0.006360022, -0.035593312, -0.04580954, 0.092614815, -0.010202253, 0.053435646, 0.06810382, 0.33141363, 0.23865972, -0.022269802, -0.35638827, -0.017010234, -0.09842304, 0.044951804, 0.013101696, 0.012481737, 0.4473684, -0.07812174, 0.008848828, -0.10374172, 0.26849997, 0.19759308, 0.14334072, 0.12648976, 0.085015506, 0.16048867, -0.04428103, -0.02241106, -0.15643533, -0.2261526, -0.27816802, 0.21147077, -0.23083511, -0.15736212, 0.16844489, 0.21279429, -0.116731785, 0.13067369, 0.3122624, 0.136315, -0.16236319, 0.266095, -0.118092746, 0.08463656, 0.3190953, -0.025155172, 0.17036186, 0.517092, 0.21172866, -0.37978435, -0.033721708, -0.20605312, -0.010164018, 0.260588, -0.16144288, 0.1739176, 0.37816438, 0.31039205, 0.44743317, -0.006673559, -0.11675396, 0.07569114, 0.20170745, 0.012370929, -0.17593956, -0.16927823, 0.26412624, 0.05698451, -0.14810121, -0.023506388, -0.16487631, 0.026122056, -0.14697117, -0.4044809, 0.04526173, 0.21642078, -0.4945617, 0.10950964, -0.29737392, 0.06892894, -0.2156567, 0.22239617, -0.23322712, -0.10665675, 0.39332575, -0.09363847, 0.050910737, -0.19002058, -0.1574706, 0.02213912, 0.036516193, -0.028725138, -0.017847586, 0.3557758, -0.12832177, 0.048198838, 4.3433104E-4, 0.22483422, -0.061355013, 0.18990348, 0.03564027, -0.1394448, -0.39040518, 0.15794353, -0.20287846, -0.4367065, -0.37070453, 0.35814124, -0.14648235, -0.26215193, -0.2346811, -0.27164423, 0.06335842, 0.18181078, 0.4646271, -0.39600596, -0.079446204, 0.49517226, -0.04852778, -0.16844611, 0.2967518, 0.19760415, -0.31121883, 0.34088603, 0.25469545, -0.018662168, 0.02773218, 0.51845956, 0.13568775, 0.18219014, -0.23326635, 0.46436208, -0.21790144, 0.31559125, -0.17255287, -0.20793658, -0.20586798, -0.001172778, 0.3308969, 0.17670253, -0.42809558, -0.10740692, 0.036743138, 0.35039994, -0.37736034, -0.06748311, 0.0051405686, -0.35943413, 0.1323299, 0.076594, 0.23542015, -0.38947168, -0.0032625643, 0.40785658, -0.3197368, 0.13108508, 0.31222928, 0.10504441, 0.376466, -0.039943226, 0.0075853164, 0.062115036, -0.23973337, -0.015673233, 0.124596596, 0.567843, 0.13057941, -0.40122274, 0.10246062, 0.25044444, -0.19272071, 0.3008771, -0.085480474, -0.04598082, 0.27369395, -0.024367249, 0.16280788, -0.09723148, -0.2331273, -0.31018296, 0.3681214, -0.20254564, -0.099116005, -0.16499473, -0.111495964, -0.13236894, 0.04578884, -0.36830792, 0.33352196, 0.118905395, -0.20436546, -0.08910314, -0.07557359, -0.15557393, -0.234037, -0.2726709, 0.43529606, -0.1588916, -0.44991276, 0.2639855, 0.03949102, 0.3693944, 0.016801286, 0.10393568, -0.069996394, 0.15449597, 0.08037712, -0.13697414, 0.28319156, 0.066741705, -0.5410128, -0.14622404, -0.2038448, 0.08338539, 0.2011268, -0.33919087, 0.04361721, 0.02101723, 0.14619805, 0.030959083, -0.09712685, -0.07434212, 0.403413, 0.22790873, 0.28365055, 0.09378035, 0.23105437, -0.029275687, -0.3173693, 0.0543687, 0.07409911, -0.2017265, 0.45038718, -0.08160741, -0.39471105, -0.066249356, 0.41151386, 0.08760141, 0.007330824, -0.066278905, 0.21011162, 0.1539203, -0.12142708, 0.1757174, -0.026292767, -0.14986737, -0.113564014, 0.07391096, -0.21108653, 0.06545459, -0.16722327, -0.006594374, -0.20676054, 0.016034845, -0.20714003, 0.2670048, -0.31555256, 0.10177538, 0.06680942, 0.28953385, -0.34272602, -0.1623317, -0.076802626, 0.15219814, 0.26613286, 0.34149784, 0.032789387, 0.028899977, -0.18126866, -0.2511501, 0.081135035, -0.21000236, 0.16121005, 0.08403291, 0.24493581, -0.32109466, -0.20124927, 0.20610592, -0.10798646, -0.14663638, 0.40788773, 0.24154414, 0.21223918, 0.0129725, 0.26252237, 0.025581531, -0.17492732, -0.12604713, -0.24954422, 0.07021474, -0.078310095, -0.051400866, -0.035224427, -0.11225072, -0.1952861, -0.17365551, 0.14578226, 0.14051022, 0.022287847, -0.057631906, -0.0139911715, -0.27890974, 0.32389244, 0.02819324, 0.05819198, -0.069384664, 0.049535263, -0.13024889, 0.23608364, 0.21746805, 0.077531695, -0.19169594, -0.06775553, -0.27078623, -0.35225046, 0.058470953, 0.12146965, 0.13006094, -0.06770603, -0.25335953, 0.007816828, -0.13747253, 0.17829442, 0.011401831, -0.16540085, -0.103149, -0.069404215, -0.03443399, 0.077324726, -0.19909632, -0.20789692, -0.10723613, -0.08203292, -0.07713148, 0.34724632, -0.06089041, 0.2798024, -0.14503583, -0.0020078092, -0.15062527, 0.13015121, -0.065569915, 0.056575716, 0.25992274, -0.46167943, -0.15914112, -0.026965503, -0.20004211, -0.141356, -0.077509604, -0.027742084, 0.22565441, -0.36088252, 0.2413167, -0.12192323, 0.18409002, -0.08263175, -0.25700292, -0.16171978, -0.0079028085, 0.24911962, -0.3509631, -0.25993952, -0.2676477, -0.08899839, -0.06546339, -0.26874635, 0.42236093, -0.12012787, -0.08681204, 0.0034873153, 0.4458711, 0.20714682, 0.17381635, 0.20979515, -0.0273073, 0.03752274, 0.11206485, -0.46285358, 0.22496869, -0.2342749, -0.12822811, 0.0051306086, 0.109438226, -0.02305657, 0.030262345, -0.12668712, -0.11175425, 0.21646728, -0.37218267, -0.06411932, 0.27182007, 0.16698217, -0.26499093, 0.042440798, 0.13600044, 0.37879592, 0.11507201, -0.2173002, 0.1320668, -0.32382184, -0.037356067, -0.20496605, -0.2866237, 0.16485146, -0.081692345, 0.06725929, -0.10196793, -0.28608373, 0.21656291, -0.064518094, -0.07937422, 0.42675838, 0.0076278546, -0.12415507, 0.14044033, 0.014213737, 0.030048521, -0.0996337, 0.2774318, 0.20182906, -0.2818618, 0.1473123, -0.1531179, -0.04014678, -0.10299498]}] |-612640902 |[(1274183715,0.12201215887654807),(-1719102856,0.2991777399965483),(-1548374770,0.31909423657258823),(-1320876223,0.32308714836804664),(1293373212,0.3656377678477694),(1634839239,0.37472233818858686),(1510101612,0.3851201869623605)]|1274183715 |0.12201215887654807 |\n", + "|Fourth document, climate change is definitely one of the most pressing problems of our time. |[{document, 0, 91, Fourth document, climate change is definitely one of the most pressing problems of our time., {sentence -> 0}, []}] |[{document, 0, 91, Fourth document, climate change is definitely one of the most pressing problems of our time., {sentence -> 0}, []}] |[{token, 0, 5, Fourth, {sentence -> 0}, []}, {token, 7, 14, document, {sentence -> 0}, []}, {token, 15, 15, ,, {sentence -> 0}, []}, {token, 17, 23, climate, {sentence -> 0}, []}, {token, 25, 30, change, {sentence -> 0}, []}, {token, 32, 33, is, {sentence -> 0}, []}, {token, 35, 44, definitely, {sentence -> 0}, []}, {token, 46, 48, one, {sentence -> 0}, []}, {token, 50, 51, of, {sentence -> 0}, []}, {token, 53, 55, the, {sentence -> 0}, []}, {token, 57, 60, most, {sentence -> 0}, []}, {token, 62, 69, pressing, {sentence -> 0}, []}, {token, 71, 78, problems, {sentence -> 0}, []}, {token, 80, 81, of, {sentence -> 0}, []}, {token, 83, 85, our, {sentence -> 0}, []}, {token, 87, 90, time, {sentence -> 0}, []}, {token, 91, 91, ., {sentence -> 0}, []}] |[{sentence_embeddings, 0, 91, Fourth document, climate change is definitely one of the most pressing problems of our time., {sentence -> 0, token -> Fourth document, climate change is definitely one of the most pressing problems of our time., pieceId -> -1, isWordStart -> true}, [-0.003835852, -0.20862408, -0.21773167, -0.061703123, 0.14652315, 0.19786398, 0.26306406, -0.078452975, -0.08771903, -0.17226249, 0.22881703, 0.0019276487, -0.097814694, 0.09299635, -0.122063614, 0.4991608, 0.21783605, -0.4668241, 0.034899868, -0.032340027, -0.25079423, 0.06777912, 0.46978673, 0.32557002, 0.10782054, 0.074416175, -0.1351248, -0.043467775, 0.18473676, 0.24320771, 0.29688674, 0.063793264, 0.1008339, 0.23997188, -0.23376757, 0.06799241, -0.3010938, 0.016623985, 0.2497724, -0.21152967, -0.07613329, 0.165055, 0.21546067, -0.16450602, -0.104793414, 0.40502572, 0.22857304, 0.026289828, -0.12639655, -0.09120597, -0.36880243, 0.34113964, 0.29069296, 0.18020727, 0.014091524, 0.015363324, -0.16181347, 0.23874591, -0.08743463, -0.100837186, -0.11651775, -0.21363984, -0.0059150266, -0.053012766, 0.025422096, -0.14887391, 0.099708736, -0.15484057, -0.13415912, 0.06851785, -0.0805267, 0.1470088, 0.16662681, -0.29883987, -0.26368806, 0.047838878, -0.5956105, -0.12314607, 0.28308532, 0.4328412, -0.114371665, 0.19504122, 0.052935146, 0.22930811, -0.01836126, -0.058793154, -0.05645759, -0.10266738, 0.17061166, 0.28478023, -0.20342122, -0.391638, 0.04970928, 0.026398072, -0.08933313, 0.0040505435, -0.010148193, -0.072514966, -0.17795344, -0.204243, 0.083572954, -0.27162114, -0.14674117, 0.25768456, -0.031521432, -0.2069687, -0.019544436, 0.30430898, 0.0742721, -0.094543956, -0.17090842, 0.41898516, 0.33163518, 1.12846494E-4, 0.028253844, 0.16404548, 0.123001106, -0.2959598, 0.41834822, -0.30666676, -0.018280396, -0.10533799, 0.13000785, 0.15551804, -0.2095328, 0.28947356, 0.1302748, 0.26099348, 0.19238193, 0.095461756, -0.034195073, 0.15064953, -0.11497837, 0.14000307, 0.21017769, 0.12219769, -0.007975813, -0.34788206, -0.2179525, 0.26906368, 0.35087898, 0.16659309, -0.048696965, 0.19371018, 0.10114278, 0.19875155, 0.14317547, -0.39720836, 0.045373674, 0.3482762, 0.11755119, 0.15139255, -0.07947107, -0.27557716, -0.25579336, -0.07741892, 0.05452082, -0.3229889, -0.1349494, 0.38116154, 0.035242334, -9.4695255E-4, -0.15345678, -0.20902577, -0.028087988, -0.12072554, 0.02527599, 0.094951235, -0.08846783, -0.4103613, -0.11496751, -0.5556833, -0.12119456, 0.19222125, -0.29915833, 0.24936001, -0.2900803, 0.09389836, 0.39451364, 0.052619856, -0.003266785, -0.1926063, -0.025942078, 0.13278645, 0.32179895, 0.24352162, -0.3960295, 0.09980806, 0.17284764, 0.25954893, 0.14217232, -0.04101023, -0.13483965, 0.13977908, -0.18719734, 0.18095459, -0.21917875, 0.1663283, -0.26652965, -0.21226367, 0.27464914, -0.40665168, -0.026470223, 0.07333406, 0.26849043, 7.350019E-4, -0.056252368, -0.08676217, 0.09196398, 0.18072756, 0.14173715, -0.399947, 0.27917507, -0.026699087, -0.034646686, -0.0265695, 0.19165027, 0.25528798, 0.10507765, -0.37910807, -0.14147788, 0.1213717, 0.28017974, -0.2178526, 0.17398752, -0.29096332, -0.3853184, -0.13676582, 0.20780607, 0.21252154, 0.16414383, -0.2619306, 0.18530105, -0.099988185, -0.437557, -0.36251107, -0.11280457, 0.2483923, 0.18430209, 0.19254051, 0.25591743, 0.044537775, 0.11579968, 0.16134737, 0.18732512, -0.1632906, 0.16494152, -0.36760136, -0.063819736, -0.26302108, -0.2046593, -0.2226854, 0.39319545, -0.20637247, 0.2204216, 0.39861265, -0.3154902, -0.110166, 0.15821502, 0.08322516, 0.07195477, -0.11205203, 0.21377234, 0.1727777, -0.1082272, 0.24539624, 0.0072368, 0.255411, 0.18038207, 0.113827385, 0.13634266, 0.13769454, -0.14098604, 0.029769506, 1.07247615E-4, -0.027630894, -0.24913825, -0.14263271, 0.1956553, -0.054647956, 0.025447316, -0.16555777, -0.09804624, 0.021545487, 0.4063341, -0.37619784, 0.25973955, 0.0909772, 0.1343971, -0.2340182, -0.21328466, 0.06330135, 0.17917435, -0.39460728, 0.012819739, 0.15837549, 0.10605624, 0.21922973, 0.25763094, -0.011688642, -0.1233857, 0.49162617, -0.15246215, -0.12871982, 0.2749578, -0.24912183, -0.27655655, 0.27614963, -0.015543518, 0.30740818, 0.12660035, 0.034775224, 0.07287423, -0.6020512, 0.0732433, -0.48010373, -0.0063999984, 0.02299516, -0.0973719, -0.194734, 0.15007432, 0.2863854, -0.2561826, -0.02701807, 0.21356896, 0.08775118, -0.1012267, 0.48577482, -0.0175645, 0.22016966, -0.062453557, 0.25920734, -0.2079024, 0.25610057, -0.2586762, -0.10540434, 0.0039830296, 0.08733931, 0.050882008, -0.052359536, -0.31297603, 0.2106775, -0.0026711975, -0.04521307, -0.04683987, 0.090368256, -0.012594639, 0.06028535, 0.06301103, 0.33172685, 0.22572477, -0.019447142, -0.35718563, -0.00800849, -0.097327664, 0.042126693, 0.013663563, 0.012726291, 0.44344866, -0.08289011, 0.007116606, -0.11163828, 0.26574427, 0.18960999, 0.1414628, 0.128762, 0.078481935, 0.157456, -0.05116457, -0.026563864, -0.15368989, -0.21608125, -0.27050465, 0.21623424, -0.23023556, -0.16277516, 0.16495888, 0.21445934, -0.111836694, 0.13322864, 0.30657476, 0.13720067, -0.159983, 0.26702243, -0.12482961, 0.09016377, 0.32040662, -0.018563824, 0.16497932, 0.5124235, 0.20532905, -0.3814022, -0.03317346, -0.20309982, -0.0040927846, 0.2560035, -0.1638267, 0.1806096, 0.38001326, 0.30242908, 0.44804883, -0.0075148335, -0.12329148, 0.08455233, 0.19882831, 0.008244825, -0.17403102, -0.17037638, 0.26745683, 0.059498128, -0.14675348, -0.02704845, -0.15730269, 0.027009208, -0.14835586, -0.3984341, 0.04951467, 0.21336877, -0.49393272, 0.10631875, -0.29439995, 0.061264765, -0.21304032, 0.21745941, -0.23458827, -0.10586627, 0.39240077, -0.09479013, 0.053299956, -0.1869098, -0.15625404, 0.020766083, 0.037299443, -0.023929408, -0.020717165, 0.35108542, -0.12770072, 0.048128624, -0.0012627487, 0.21702303, -0.059279095, 0.19388442, 0.03769969, -0.14153397, -0.38913018, 0.13990252, -0.21227513, -0.433339, -0.3663871, 0.36315858, -0.14353605, -0.26056486, -0.22842707, -0.26850614, 0.06737485, 0.18009762, 0.46545935, -0.3985722, -0.077369705, 0.49015, -0.053387765, -0.16854678, 0.29057407, 0.1913427, -0.3114105, 0.33391303, 0.2558109, -0.023194117, 0.018415831, 0.51090944, 0.13048178, 0.17275383, -0.22784911, 0.46020842, -0.21511106, 0.31633744, -0.17009853, -0.20447387, -0.209933, -0.004602926, 0.32671034, 0.17847407, -0.42724732, -0.10567779, 0.038091425, 0.34635022, -0.36916775, -0.06940825, 0.0031484652, -0.35126984, 0.12976545, 0.07222441, 0.22948559, -0.38525584, -0.0054675937, 0.4029358, -0.3128454, 0.13479385, 0.30039707, 0.10271079, 0.37128493, -0.039665103, 0.0044320114, 0.05975092, -0.23670478, -0.014700713, 0.12472548, 0.56054676, 0.12695724, -0.39929366, 0.09567339, 0.24853627, -0.18731521, 0.29644012, -0.08412853, -0.047205262, 0.2728614, -0.02490702, 0.15606382, -0.097337656, -0.22999285, -0.31338483, 0.36374938, -0.19818754, -0.099194676, -0.16242108, -0.11241214, -0.13800095, 0.04759648, -0.36923477, 0.3377149, 0.119674034, -0.19736561, -0.08515725, -0.08039907, -0.14816573, -0.23759411, -0.27405044, 0.4255974, -0.16980892, -0.45103624, 0.2592508, 0.041907087, 0.36110488, 0.024747934, 0.10244622, -0.059871107, 0.15316144, 0.0838079, -0.13005643, 0.28822184, 0.0665008, -0.5368563, -0.15151228, -0.2019773, 0.08225291, 0.192799, -0.33645836, 0.038935043, 0.018619873, 0.14761768, 0.025234457, -0.09481421, -0.075215675, 0.4102618, 0.2266266, 0.2828633, 0.09196845, 0.23114052, -0.027229344, -0.32195765, 0.05286099, 0.07850093, -0.20198047, 0.44307065, -0.0828929, -0.3885311, -0.0633556, 0.39846793, 0.08895412, 0.0034600352, -0.05967322, 0.20862308, 0.15983063, -0.12449649, 0.17403392, -0.028444685, -0.14034821, -0.11273413, 0.074709274, -0.21595575, 0.06382918, -0.15581375, -0.0037954638, -0.2143301, 0.02041007, -0.20123285, 0.2582673, -0.31853202, 0.1053178, 0.06505883, 0.2866883, -0.34562057, -0.1629475, -0.074389875, 0.15515953, 0.26866287, 0.3449197, 0.031800583, 0.023580268, -0.1726088, -0.25076672, 0.081279226, -0.20510998, 0.1600836, 0.085919306, 0.24515575, -0.31630135, -0.19066125, 0.20611435, -0.09954545, -0.14281541, 0.4028987, 0.24429737, 0.21244703, 0.019491952, 0.2653371, 0.026023164, -0.17607515, -0.121807344, -0.24455278, 0.06749151, -0.074221194, -0.04900035, -0.03342888, -0.115426466, -0.18984312, -0.17625487, 0.1499366, 0.1351458, 0.017796097, -0.05602583, -0.017981825, -0.27410272, 0.31989276, 0.031326815, 0.061995383, -0.065489784, 0.04399158, -0.13519874, 0.23140492, 0.21671903, 0.09083854, -0.18631245, -0.057688475, -0.27705774, -0.35227805, 0.056066796, 0.1273246, 0.12778823, -0.07574217, -0.24935283, 0.008509332, -0.13331816, 0.17538325, 0.009673233, -0.16985297, -0.1068587, -0.06615289, -0.03307501, 0.07584848, -0.19225228, -0.20574294, -0.11182022, -0.08309786, -0.08347798, 0.35421437, -0.05453685, 0.27538902, -0.14582933, -0.0071586724, -0.1518315, 0.12785475, -0.065459505, 0.063663825, 0.24955855, -0.4544611, -0.16337994, -0.029619344, -0.1973021, -0.14364167, -0.079227276, -0.02981196, 0.2262998, -0.3568656, 0.2402911, -0.120060846, 0.19488864, -0.07728642, -0.2543439, -0.15293853, -0.009106492, 0.24851386, -0.34600765, -0.25706416, -0.2659431, -0.08471125, -0.059878696, -0.27150732, 0.42241326, -0.11469594, -0.08416896, 0.0047065616, 0.45044258, 0.20738047, 0.17053586, 0.20957811, -0.029988201, 0.040661167, 0.10623518, -0.4656837, 0.22994503, -0.23307306, -0.11735744, 0.0035948404, 0.10942622, -0.021296069, 0.022905214, -0.124894835, -0.10581059, 0.21904148, -0.3748968, -0.0631806, 0.271241, 0.1652423, -0.2599846, 0.038464893, 0.13197884, 0.38681895, 0.11253837, -0.21754825, 0.13165388, -0.32750425, -0.028989421, -0.20510201, -0.28893355, 0.15721151, -0.08457131, 0.070540816, -0.09493317, -0.28037602, 0.20949727, -0.066243395, -0.08535814, 0.42199194, 0.0021733278, -0.11638748, 0.13930771, 0.008724968, 0.032127243, -0.09387868, 0.2737223, 0.20442082, -0.2744895, 0.14772631, -0.154034, -0.040445644, -0.10262055]}] |[{doc_similarity_rankings, 0, 91, Fourth document, climate change is definitely one of the most pressing problems of our time., {pieceId -> -1, lshId -> 1274183715, isWordStart -> true, token -> Fourth document, climate change is definitely one of the most pressing problems of our time., lshNeighbors -> [(-612640902,0.12201215887654807),(-1719102856,0.28519768414650126),(-1320876223,0.3148226377437515),(-1548374770,0.3181712969830674),(1293373212,0.34962777859773414),(1634839239,0.3554576544360366),(1510101612,0.36788497133113396)], sentence -> 0}, [-0.003835852, -0.20862408, -0.21773167, -0.061703123, 0.14652315, 0.19786398, 0.26306406, -0.078452975, -0.08771903, -0.17226249, 0.22881703, 0.0019276487, -0.097814694, 0.09299635, -0.122063614, 0.4991608, 0.21783605, -0.4668241, 0.034899868, -0.032340027, -0.25079423, 0.06777912, 0.46978673, 0.32557002, 0.10782054, 0.074416175, -0.1351248, -0.043467775, 0.18473676, 0.24320771, 0.29688674, 0.063793264, 0.1008339, 0.23997188, -0.23376757, 0.06799241, -0.3010938, 0.016623985, 0.2497724, -0.21152967, -0.07613329, 0.165055, 0.21546067, -0.16450602, -0.104793414, 0.40502572, 0.22857304, 0.026289828, -0.12639655, -0.09120597, -0.36880243, 0.34113964, 0.29069296, 0.18020727, 0.014091524, 0.015363324, -0.16181347, 0.23874591, -0.08743463, -0.100837186, -0.11651775, -0.21363984, -0.0059150266, -0.053012766, 0.025422096, -0.14887391, 0.099708736, -0.15484057, -0.13415912, 0.06851785, -0.0805267, 0.1470088, 0.16662681, -0.29883987, -0.26368806, 0.047838878, -0.5956105, -0.12314607, 0.28308532, 0.4328412, -0.114371665, 0.19504122, 0.052935146, 0.22930811, -0.01836126, -0.058793154, -0.05645759, -0.10266738, 0.17061166, 0.28478023, -0.20342122, -0.391638, 0.04970928, 0.026398072, -0.08933313, 0.0040505435, -0.010148193, -0.072514966, -0.17795344, -0.204243, 0.083572954, -0.27162114, -0.14674117, 0.25768456, -0.031521432, -0.2069687, -0.019544436, 0.30430898, 0.0742721, -0.094543956, -0.17090842, 0.41898516, 0.33163518, 1.12846494E-4, 0.028253844, 0.16404548, 0.123001106, -0.2959598, 0.41834822, -0.30666676, -0.018280396, -0.10533799, 0.13000785, 0.15551804, -0.2095328, 0.28947356, 0.1302748, 0.26099348, 0.19238193, 0.095461756, -0.034195073, 0.15064953, -0.11497837, 0.14000307, 0.21017769, 0.12219769, -0.007975813, -0.34788206, -0.2179525, 0.26906368, 0.35087898, 0.16659309, -0.048696965, 0.19371018, 0.10114278, 0.19875155, 0.14317547, -0.39720836, 0.045373674, 0.3482762, 0.11755119, 0.15139255, -0.07947107, -0.27557716, -0.25579336, -0.07741892, 0.05452082, -0.3229889, -0.1349494, 0.38116154, 0.035242334, -9.4695255E-4, -0.15345678, -0.20902577, -0.028087988, -0.12072554, 0.02527599, 0.094951235, -0.08846783, -0.4103613, -0.11496751, -0.5556833, -0.12119456, 0.19222125, -0.29915833, 0.24936001, -0.2900803, 0.09389836, 0.39451364, 0.052619856, -0.003266785, -0.1926063, -0.025942078, 0.13278645, 0.32179895, 0.24352162, -0.3960295, 0.09980806, 0.17284764, 0.25954893, 0.14217232, -0.04101023, -0.13483965, 0.13977908, -0.18719734, 0.18095459, -0.21917875, 0.1663283, -0.26652965, -0.21226367, 0.27464914, -0.40665168, -0.026470223, 0.07333406, 0.26849043, 7.350019E-4, -0.056252368, -0.08676217, 0.09196398, 0.18072756, 0.14173715, -0.399947, 0.27917507, -0.026699087, -0.034646686, -0.0265695, 0.19165027, 0.25528798, 0.10507765, -0.37910807, -0.14147788, 0.1213717, 0.28017974, -0.2178526, 0.17398752, -0.29096332, -0.3853184, -0.13676582, 0.20780607, 0.21252154, 0.16414383, -0.2619306, 0.18530105, -0.099988185, -0.437557, -0.36251107, -0.11280457, 0.2483923, 0.18430209, 0.19254051, 0.25591743, 0.044537775, 0.11579968, 0.16134737, 0.18732512, -0.1632906, 0.16494152, -0.36760136, -0.063819736, -0.26302108, -0.2046593, -0.2226854, 0.39319545, -0.20637247, 0.2204216, 0.39861265, -0.3154902, -0.110166, 0.15821502, 0.08322516, 0.07195477, -0.11205203, 0.21377234, 0.1727777, -0.1082272, 0.24539624, 0.0072368, 0.255411, 0.18038207, 0.113827385, 0.13634266, 0.13769454, -0.14098604, 0.029769506, 1.07247615E-4, -0.027630894, -0.24913825, -0.14263271, 0.1956553, -0.054647956, 0.025447316, -0.16555777, -0.09804624, 0.021545487, 0.4063341, -0.37619784, 0.25973955, 0.0909772, 0.1343971, -0.2340182, -0.21328466, 0.06330135, 0.17917435, -0.39460728, 0.012819739, 0.15837549, 0.10605624, 0.21922973, 0.25763094, -0.011688642, -0.1233857, 0.49162617, -0.15246215, -0.12871982, 0.2749578, -0.24912183, -0.27655655, 0.27614963, -0.015543518, 0.30740818, 0.12660035, 0.034775224, 0.07287423, -0.6020512, 0.0732433, -0.48010373, -0.0063999984, 0.02299516, -0.0973719, -0.194734, 0.15007432, 0.2863854, -0.2561826, -0.02701807, 0.21356896, 0.08775118, -0.1012267, 0.48577482, -0.0175645, 0.22016966, -0.062453557, 0.25920734, -0.2079024, 0.25610057, -0.2586762, -0.10540434, 0.0039830296, 0.08733931, 0.050882008, -0.052359536, -0.31297603, 0.2106775, -0.0026711975, -0.04521307, -0.04683987, 0.090368256, -0.012594639, 0.06028535, 0.06301103, 0.33172685, 0.22572477, -0.019447142, -0.35718563, -0.00800849, -0.097327664, 0.042126693, 0.013663563, 0.012726291, 0.44344866, -0.08289011, 0.007116606, -0.11163828, 0.26574427, 0.18960999, 0.1414628, 0.128762, 0.078481935, 0.157456, -0.05116457, -0.026563864, -0.15368989, -0.21608125, -0.27050465, 0.21623424, -0.23023556, -0.16277516, 0.16495888, 0.21445934, -0.111836694, 0.13322864, 0.30657476, 0.13720067, -0.159983, 0.26702243, -0.12482961, 0.09016377, 0.32040662, -0.018563824, 0.16497932, 0.5124235, 0.20532905, -0.3814022, -0.03317346, -0.20309982, -0.0040927846, 0.2560035, -0.1638267, 0.1806096, 0.38001326, 0.30242908, 0.44804883, -0.0075148335, -0.12329148, 0.08455233, 0.19882831, 0.008244825, -0.17403102, -0.17037638, 0.26745683, 0.059498128, -0.14675348, -0.02704845, -0.15730269, 0.027009208, -0.14835586, -0.3984341, 0.04951467, 0.21336877, -0.49393272, 0.10631875, -0.29439995, 0.061264765, -0.21304032, 0.21745941, -0.23458827, -0.10586627, 0.39240077, -0.09479013, 0.053299956, -0.1869098, -0.15625404, 0.020766083, 0.037299443, -0.023929408, -0.020717165, 0.35108542, -0.12770072, 0.048128624, -0.0012627487, 0.21702303, -0.059279095, 0.19388442, 0.03769969, -0.14153397, -0.38913018, 0.13990252, -0.21227513, -0.433339, -0.3663871, 0.36315858, -0.14353605, -0.26056486, -0.22842707, -0.26850614, 0.06737485, 0.18009762, 0.46545935, -0.3985722, -0.077369705, 0.49015, -0.053387765, -0.16854678, 0.29057407, 0.1913427, -0.3114105, 0.33391303, 0.2558109, -0.023194117, 0.018415831, 0.51090944, 0.13048178, 0.17275383, -0.22784911, 0.46020842, -0.21511106, 0.31633744, -0.17009853, -0.20447387, -0.209933, -0.004602926, 0.32671034, 0.17847407, -0.42724732, -0.10567779, 0.038091425, 0.34635022, -0.36916775, -0.06940825, 0.0031484652, -0.35126984, 0.12976545, 0.07222441, 0.22948559, -0.38525584, -0.0054675937, 0.4029358, -0.3128454, 0.13479385, 0.30039707, 0.10271079, 0.37128493, -0.039665103, 0.0044320114, 0.05975092, -0.23670478, -0.014700713, 0.12472548, 0.56054676, 0.12695724, -0.39929366, 0.09567339, 0.24853627, -0.18731521, 0.29644012, -0.08412853, -0.047205262, 0.2728614, -0.02490702, 0.15606382, -0.097337656, -0.22999285, -0.31338483, 0.36374938, -0.19818754, -0.099194676, -0.16242108, -0.11241214, -0.13800095, 0.04759648, -0.36923477, 0.3377149, 0.119674034, -0.19736561, -0.08515725, -0.08039907, -0.14816573, -0.23759411, -0.27405044, 0.4255974, -0.16980892, -0.45103624, 0.2592508, 0.041907087, 0.36110488, 0.024747934, 0.10244622, -0.059871107, 0.15316144, 0.0838079, -0.13005643, 0.28822184, 0.0665008, -0.5368563, -0.15151228, -0.2019773, 0.08225291, 0.192799, -0.33645836, 0.038935043, 0.018619873, 0.14761768, 0.025234457, -0.09481421, -0.075215675, 0.4102618, 0.2266266, 0.2828633, 0.09196845, 0.23114052, -0.027229344, -0.32195765, 0.05286099, 0.07850093, -0.20198047, 0.44307065, -0.0828929, -0.3885311, -0.0633556, 0.39846793, 0.08895412, 0.0034600352, -0.05967322, 0.20862308, 0.15983063, -0.12449649, 0.17403392, -0.028444685, -0.14034821, -0.11273413, 0.074709274, -0.21595575, 0.06382918, -0.15581375, -0.0037954638, -0.2143301, 0.02041007, -0.20123285, 0.2582673, -0.31853202, 0.1053178, 0.06505883, 0.2866883, -0.34562057, -0.1629475, -0.074389875, 0.15515953, 0.26866287, 0.3449197, 0.031800583, 0.023580268, -0.1726088, -0.25076672, 0.081279226, -0.20510998, 0.1600836, 0.085919306, 0.24515575, -0.31630135, -0.19066125, 0.20611435, -0.09954545, -0.14281541, 0.4028987, 0.24429737, 0.21244703, 0.019491952, 0.2653371, 0.026023164, -0.17607515, -0.121807344, -0.24455278, 0.06749151, -0.074221194, -0.04900035, -0.03342888, -0.115426466, -0.18984312, -0.17625487, 0.1499366, 0.1351458, 0.017796097, -0.05602583, -0.017981825, -0.27410272, 0.31989276, 0.031326815, 0.061995383, -0.065489784, 0.04399158, -0.13519874, 0.23140492, 0.21671903, 0.09083854, -0.18631245, -0.057688475, -0.27705774, -0.35227805, 0.056066796, 0.1273246, 0.12778823, -0.07574217, -0.24935283, 0.008509332, -0.13331816, 0.17538325, 0.009673233, -0.16985297, -0.1068587, -0.06615289, -0.03307501, 0.07584848, -0.19225228, -0.20574294, -0.11182022, -0.08309786, -0.08347798, 0.35421437, -0.05453685, 0.27538902, -0.14582933, -0.0071586724, -0.1518315, 0.12785475, -0.065459505, 0.063663825, 0.24955855, -0.4544611, -0.16337994, -0.029619344, -0.1973021, -0.14364167, -0.079227276, -0.02981196, 0.2262998, -0.3568656, 0.2402911, -0.120060846, 0.19488864, -0.07728642, -0.2543439, -0.15293853, -0.009106492, 0.24851386, -0.34600765, -0.25706416, -0.2659431, -0.08471125, -0.059878696, -0.27150732, 0.42241326, -0.11469594, -0.08416896, 0.0047065616, 0.45044258, 0.20738047, 0.17053586, 0.20957811, -0.029988201, 0.040661167, 0.10623518, -0.4656837, 0.22994503, -0.23307306, -0.11735744, 0.0035948404, 0.10942622, -0.021296069, 0.022905214, -0.124894835, -0.10581059, 0.21904148, -0.3748968, -0.0631806, 0.271241, 0.1652423, -0.2599846, 0.038464893, 0.13197884, 0.38681895, 0.11253837, -0.21754825, 0.13165388, -0.32750425, -0.028989421, -0.20510201, -0.28893355, 0.15721151, -0.08457131, 0.070540816, -0.09493317, -0.28037602, 0.20949727, -0.066243395, -0.08535814, 0.42199194, 0.0021733278, -0.11638748, 0.13930771, 0.008724968, 0.032127243, -0.09387868, 0.2737223, 0.20442082, -0.2744895, 0.14772631, -0.154034, -0.040445644, -0.10262055]}] |1274183715 |[(-612640902,0.12201215887654807),(-1719102856,0.28519768414650126),(-1320876223,0.3148226377437515),(-1548374770,0.3181712969830674),(1293373212,0.34962777859773414),(1634839239,0.3554576544360366),(1510101612,0.36788497133113396)]|-612640902 |0.12201215887654807 |\n", + "|Fifth document, Florence in Italy, is among the most beautiful cities in Europe. |[{document, 0, 79, Fifth document, Florence in Italy, is among the most beautiful cities in Europe., {sentence -> 0}, []}] |[{document, 0, 79, Fifth document, Florence in Italy, is among the most beautiful cities in Europe., {sentence -> 0}, []}] |[{token, 0, 4, Fifth, {sentence -> 0}, []}, {token, 6, 13, document, {sentence -> 0}, []}, {token, 14, 14, ,, {sentence -> 0}, []}, {token, 16, 23, Florence, {sentence -> 0}, []}, {token, 25, 26, in, {sentence -> 0}, []}, {token, 28, 32, Italy, {sentence -> 0}, []}, {token, 33, 33, ,, {sentence -> 0}, []}, {token, 35, 36, is, {sentence -> 0}, []}, {token, 38, 42, among, {sentence -> 0}, []}, {token, 44, 46, the, {sentence -> 0}, []}, {token, 48, 51, most, {sentence -> 0}, []}, {token, 53, 61, beautiful, {sentence -> 0}, []}, {token, 63, 68, cities, {sentence -> 0}, []}, {token, 70, 71, in, {sentence -> 0}, []}, {token, 73, 78, Europe, {sentence -> 0}, []}, {token, 79, 79, ., {sentence -> 0}, []}] |[{sentence_embeddings, 0, 79, Fifth document, Florence in Italy, is among the most beautiful cities in Europe., {sentence -> 0, token -> Fifth document, Florence in Italy, is among the most beautiful cities in Europe., pieceId -> -1, isWordStart -> true}, [-0.017528567, -0.20887347, -0.21519172, -0.03888638, 0.13357492, 0.18307811, 0.25547925, -0.08337439, -0.093116276, -0.16162084, 0.23401257, -0.013713108, -0.12097579, 0.09835637, -0.13103157, 0.49466628, 0.2324918, -0.45448515, 0.019689137, -0.04135379, -0.2731323, 0.04852046, 0.45949715, 0.31224954, 0.09636614, 0.07835619, -0.14175831, -0.027474394, 0.19975916, 0.24123047, 0.29196763, 0.067277946, 0.11105793, 0.25373426, -0.24394739, 0.06752376, -0.3060502, 9.322982E-4, 0.23657267, -0.20978928, -0.07832431, 0.17510965, 0.22120374, -0.17409658, -0.10543208, 0.41644603, 0.2284994, 0.019994497, -0.1206437, -0.10357072, -0.36405995, 0.34403795, 0.2979404, 0.18125014, 0.018748851, 0.04012803, -0.1733186, 0.24439529, -0.08001437, -0.11709171, -0.12347519, -0.21569543, -0.026534403, -0.050762545, 0.008339827, -0.14291511, 0.09049857, -0.16389105, -0.14423226, 0.061702, -0.095125, 0.12417953, 0.16685584, -0.3054142, -0.28169885, 0.050286457, -0.6093201, -0.122661844, 0.2895588, 0.4279639, -0.13902375, 0.21143883, 0.051323365, 0.23632784, -0.01041111, -0.04957247, -0.055871606, -0.106259845, 0.18611698, 0.28124726, -0.2007103, -0.38195023, 0.06384789, 0.01629718, -0.0945481, -0.010838091, -0.04080201, -0.054464836, -0.17945004, -0.21212989, 0.07180114, -0.27967128, -0.1390803, 0.2592707, -0.03620937, -0.19366772, -0.026854295, 0.29872236, 0.07377065, -0.09859825, -0.16164072, 0.43511766, 0.33360565, 0.00644084, 0.021297934, 0.17641163, 0.109603375, -0.2808542, 0.4100023, -0.30830663, -0.02324736, -0.12787724, 0.13755724, 0.14575775, -0.21397276, 0.27359906, 0.1360874, 0.2805497, 0.18917772, 0.082572766, -0.03715889, 0.14764093, -0.116034165, 0.14106748, 0.21679801, 0.12502475, -0.017175207, -0.351808, -0.21835403, 0.26628754, 0.364595, 0.17638803, -0.041836392, 0.18179081, 0.108427785, 0.22434832, 0.15892959, -0.39827198, 0.044703104, 0.3359178, 0.13377586, 0.15026097, -0.08827668, -0.26545244, -0.24913852, -0.08579704, 0.05721378, -0.31592715, -0.14231163, 0.38532966, 0.036052644, -0.0027677694, -0.15842925, -0.21617033, -0.03264196, -0.13026895, 0.05327642, 0.08736663, -0.09196017, -0.41138726, -0.090421245, -0.5568038, -0.117088675, 0.1952759, -0.2986329, 0.24587378, -0.28948125, 0.083854966, 0.3666834, 0.025861917, -0.0017132398, -0.18599303, -0.03876378, 0.12794247, 0.3335259, 0.22762723, -0.39386296, 0.08550472, 0.14752512, 0.2544776, 0.13983288, -0.030865967, -0.13050406, 0.124644294, -0.19920404, 0.18511117, -0.20913021, 0.18329288, -0.26674783, -0.20118403, 0.2890029, -0.40576503, -0.040320467, 0.09285337, 0.2520434, 0.0034789476, -0.042712186, -0.07351959, 0.08958212, 0.18637295, 0.15107888, -0.3961306, 0.2681504, -0.021723367, -0.024320226, -0.031113315, 0.19820504, 0.26775804, 0.13101543, -0.39684162, -0.1377801, 0.104406044, 0.28336167, -0.22840387, 0.16368443, -0.29106283, -0.38266584, -0.13229474, 0.20744194, 0.22572315, 0.16587397, -0.27363026, 0.1919627, -0.088201866, -0.42006725, -0.37401968, -0.11527952, 0.23918867, 0.19769923, 0.17820881, 0.2648834, 0.011358881, 0.12344342, 0.17440955, 0.1688173, -0.15462974, 0.18139134, -0.37719452, -0.07280123, -0.26092157, -0.19534999, -0.23901841, 0.3925651, -0.2268674, 0.2323894, 0.38537943, -0.32429454, -0.094378285, 0.16097061, 0.084330045, 0.07674305, -0.100616775, 0.20368801, 0.18317987, -0.10215017, 0.25102583, 0.013015796, 0.2618376, 0.19005089, 0.10934586, 0.121803135, 0.14295664, -0.13193665, 0.0531183, 0.017180707, -0.027863244, -0.26400146, -0.1470274, 0.21853186, -0.02715699, 0.027724234, -0.1564268, -0.102942735, 0.016841251, 0.40123186, -0.37726215, 0.26930076, 0.08578669, 0.1325882, -0.24705328, -0.21208054, 0.068617605, 0.18242663, -0.39056832, -0.01681968, 0.1632192, 0.10309466, 0.21451901, 0.25458825, 0.0111891525, -0.11184347, 0.4824411, -0.15262514, -0.12453984, 0.26980564, -0.24968751, -0.2749819, 0.27711532, -0.024115924, 0.30004704, 0.11883662, 0.026279865, 0.08240666, -0.6025861, 0.07997487, -0.46908012, 0.00318713, 0.017263228, -0.08420625, -0.20542656, 0.15257843, 0.28043687, -0.2576107, -0.029525554, 0.22024228, 0.08719829, -0.10169949, 0.47289637, -0.0070210746, 0.21146531, -0.09518342, 0.25460428, -0.22287658, 0.27640212, -0.2639986, -0.09115377, 0.012192329, 0.11418934, 0.042151, -0.062401574, -0.3224692, 0.21616587, -0.009924554, -0.03401868, -0.06122483, 0.09629407, 0.0010092515, 0.04388869, 0.06199242, 0.31550166, 0.21101683, -0.029377753, -0.36951146, -0.0112636825, -0.102699466, 0.035519373, 0.015170618, -0.014788982, 0.4407807, -0.09595226, 0.018110882, -0.13322428, 0.25438005, 0.20591274, 0.13240317, 0.1370632, 0.07530696, 0.15535797, -0.054800827, -0.00992456, -0.15799199, -0.22623523, -0.27510443, 0.21811266, -0.22423144, -0.1767162, 0.18349846, 0.22506163, -0.13641146, 0.12888514, 0.32516634, 0.116478354, -0.14479439, 0.2633992, -0.101380765, 0.08838486, 0.31893408, -0.022981562, 0.15893175, 0.51558924, 0.21190989, -0.36944315, -0.048417248, -0.22201133, -0.013516225, 0.26638502, -0.15765886, 0.1851696, 0.38429335, 0.30839187, 0.44408417, 0.016373433, -0.111148976, 0.09977997, 0.21016383, 0.018669449, -0.16498841, -0.15592425, 0.26489583, 0.06269013, -0.15986864, -0.03969585, -0.13937603, 0.031850375, -0.13655634, -0.40000895, 0.05436843, 0.21910602, -0.4996334, 0.086884744, -0.29076114, 0.05204509, -0.22826457, 0.20508406, -0.2507804, -0.10823102, 0.38743898, -0.100984976, 0.058472827, -0.16382112, -0.16156591, 0.023903372, 0.033624973, -0.023788728, -0.018010495, 0.3690815, -0.09954051, 0.052623548, 0.004742802, 0.19814914, -0.06711465, 0.17929058, 0.03160398, -0.1334006, -0.36719155, 0.13863695, -0.22021416, -0.4222436, -0.35124636, 0.36573943, -0.14639676, -0.26974335, -0.21589987, -0.2655588, 0.06684742, 0.17689691, 0.44980702, -0.4014982, -0.09032181, 0.48900092, -0.07084343, -0.1657361, 0.29341906, 0.21496047, -0.3124336, 0.3256341, 0.26760855, -0.04306482, 0.025937881, 0.5136517, 0.13405447, 0.18719737, -0.22120632, 0.4532139, -0.22904672, 0.32023016, -0.17113322, -0.214062, -0.19590136, -0.004523295, 0.3206988, 0.18094444, -0.4241107, -0.09262745, 0.01998503, 0.3636845, -0.36537707, -0.064887315, -0.0055596177, -0.34393403, 0.14510506, 0.06153031, 0.21042548, -0.37673673, 0.00559097, 0.3951263, -0.3184028, 0.13477531, 0.32866392, 0.11474525, 0.36869153, -0.041948557, 0.0048171515, 0.060693625, -0.23275205, 0.0050272522, 0.10768602, 0.5644927, 0.16615626, -0.408257, 0.09772438, 0.2569018, -0.17795925, 0.30351314, -0.08614802, -0.053504214, 0.26501653, -0.03555109, 0.15758383, -0.09940738, -0.24023113, -0.31173047, 0.38147378, -0.21063639, -0.09774987, -0.1817993, -0.11774616, -0.14214633, 0.059035324, -0.3725449, 0.3340544, 0.13414147, -0.19714779, -0.09575635, -0.09342445, -0.14242783, -0.22104931, -0.26635736, 0.4439432, -0.15269086, -0.45180112, 0.26066065, 0.040218845, 0.35135338, 0.042087253, 0.11012991, -0.070131294, 0.16262923, 0.093960635, -0.13531551, 0.27414426, 0.06098134, -0.5368306, -0.14256248, -0.237552, 0.084959276, 0.18812361, -0.33930442, 0.038618933, 0.020493202, 0.12378717, 0.025085747, -0.091824085, -0.07121754, 0.39458132, 0.23002088, 0.2822764, 0.09145013, 0.21399665, -0.044916313, -0.33112302, 0.037864167, 0.07553057, -0.20884651, 0.44166437, -0.08572297, -0.39234006, -0.072089285, 0.40756452, 0.09254245, 0.009302463, -0.047768205, 0.22046876, 0.16218755, -0.13332985, 0.18362974, -0.019280719, -0.12338676, -0.114508614, 0.05637966, -0.22382355, 0.056215905, -0.15588409, -0.0018170164, -0.21711601, 0.01625738, -0.19152458, 0.2638185, -0.3450164, 0.08638652, 0.06305374, 0.27371433, -0.35860744, -0.17530364, -0.08452919, 0.16955557, 0.28973067, 0.34323737, 0.01695178, 0.011290298, -0.1810764, -0.22997546, 0.059361335, -0.22219576, 0.1413909, 0.06546778, 0.24282089, -0.32794768, -0.17942922, 0.18658105, -0.097890414, -0.1513686, 0.39975017, 0.25039408, 0.21449175, 0.024416223, 0.27124867, 4.2602097E-4, -0.18044855, -0.13447432, -0.2740434, 0.07203264, -0.07980538, -0.07517155, -0.05881436, -0.12858282, -0.20378308, -0.16090913, 0.15564685, 0.13000391, 0.024031563, -0.057775024, -0.0034629763, -0.2688574, 0.32077903, 0.028848177, 0.047397353, -0.05958363, 0.04970749, -0.13669938, 0.22241849, 0.19625284, 0.08440484, -0.18950449, -0.07067425, -0.2850016, -0.36458367, 0.057933036, 0.13580492, 0.108672015, -0.07561426, -0.2602871, 0.010676954, -0.14393383, 0.17847988, 0.008865389, -0.1725383, -0.09610779, -0.07299303, -0.034239206, 0.074058056, -0.19913039, -0.20220406, -0.09904165, -0.076578766, -0.069398046, 0.33933428, -0.045407534, 0.2897711, -0.14395963, 0.006539133, -0.14036646, 0.1162395, -0.06965859, 0.039443154, 0.276661, -0.45402157, -0.16553378, -0.02244941, -0.19449434, -0.12615664, -0.07213336, -0.032101206, 0.2264348, -0.34752584, 0.24813168, -0.12469421, 0.19268365, -0.07229131, -0.26878825, -0.1646568, -0.0136323245, 0.24497744, -0.33925042, -0.24806993, -0.27512118, -0.09914559, -0.08087616, -0.25906667, 0.43983266, -0.10659018, -0.0744615, 0.006235671, 0.45885658, 0.20347849, 0.16656812, 0.20918673, -0.033345483, 0.044874102, 0.12723547, -0.47557342, 0.22238532, -0.22489853, -0.123347975, 0.0065780478, 0.110617466, -0.008447691, 0.044564214, -0.13392605, -0.09336997, 0.2162811, -0.3759698, -0.06168682, 0.26348773, 0.14962575, -0.2495721, 0.023211598, 0.13297167, 0.38353235, 0.08947278, -0.2225364, 0.105881885, -0.33606172, -0.042888578, -0.2036912, -0.3129537, 0.1476135, -0.07123925, 0.079535104, -0.09969524, -0.29478237, 0.21786276, -0.059731945, -0.092672855, 0.4276989, -3.9967615E-4, -0.12086898, 0.1315131, 0.01156185, 0.019471781, -0.10278702, 0.2727465, 0.20356695, -0.28574204, 0.16212702, -0.14182404, -0.051593896, -0.09539393]}] |[{doc_similarity_rankings, 0, 79, Fifth document, Florence in Italy, is among the most beautiful cities in Europe., {pieceId -> -1, lshId -> -1320876223, isWordStart -> true, token -> Fifth document, Florence in Italy, is among the most beautiful cities in Europe., lshNeighbors -> [(1293373212,0.17848861258809434),(-1719102856,0.2761524746260818),(1274183715,0.3148226377437515),(-612640902,0.32308714836804664),(-1548374770,0.32312628638943774),(1634839239,0.3992484826857293),(1510101612,0.3994126224276987)], sentence -> 0}, [-0.017528567, -0.20887347, -0.21519172, -0.03888638, 0.13357492, 0.18307811, 0.25547925, -0.08337439, -0.093116276, -0.16162084, 0.23401257, -0.013713108, -0.12097579, 0.09835637, -0.13103157, 0.49466628, 0.2324918, -0.45448515, 0.019689137, -0.04135379, -0.2731323, 0.04852046, 0.45949715, 0.31224954, 0.09636614, 0.07835619, -0.14175831, -0.027474394, 0.19975916, 0.24123047, 0.29196763, 0.067277946, 0.11105793, 0.25373426, -0.24394739, 0.06752376, -0.3060502, 9.322982E-4, 0.23657267, -0.20978928, -0.07832431, 0.17510965, 0.22120374, -0.17409658, -0.10543208, 0.41644603, 0.2284994, 0.019994497, -0.1206437, -0.10357072, -0.36405995, 0.34403795, 0.2979404, 0.18125014, 0.018748851, 0.04012803, -0.1733186, 0.24439529, -0.08001437, -0.11709171, -0.12347519, -0.21569543, -0.026534403, -0.050762545, 0.008339827, -0.14291511, 0.09049857, -0.16389105, -0.14423226, 0.061702, -0.095125, 0.12417953, 0.16685584, -0.3054142, -0.28169885, 0.050286457, -0.6093201, -0.122661844, 0.2895588, 0.4279639, -0.13902375, 0.21143883, 0.051323365, 0.23632784, -0.01041111, -0.04957247, -0.055871606, -0.106259845, 0.18611698, 0.28124726, -0.2007103, -0.38195023, 0.06384789, 0.01629718, -0.0945481, -0.010838091, -0.04080201, -0.054464836, -0.17945004, -0.21212989, 0.07180114, -0.27967128, -0.1390803, 0.2592707, -0.03620937, -0.19366772, -0.026854295, 0.29872236, 0.07377065, -0.09859825, -0.16164072, 0.43511766, 0.33360565, 0.00644084, 0.021297934, 0.17641163, 0.109603375, -0.2808542, 0.4100023, -0.30830663, -0.02324736, -0.12787724, 0.13755724, 0.14575775, -0.21397276, 0.27359906, 0.1360874, 0.2805497, 0.18917772, 0.082572766, -0.03715889, 0.14764093, -0.116034165, 0.14106748, 0.21679801, 0.12502475, -0.017175207, -0.351808, -0.21835403, 0.26628754, 0.364595, 0.17638803, -0.041836392, 0.18179081, 0.108427785, 0.22434832, 0.15892959, -0.39827198, 0.044703104, 0.3359178, 0.13377586, 0.15026097, -0.08827668, -0.26545244, -0.24913852, -0.08579704, 0.05721378, -0.31592715, -0.14231163, 0.38532966, 0.036052644, -0.0027677694, -0.15842925, -0.21617033, -0.03264196, -0.13026895, 0.05327642, 0.08736663, -0.09196017, -0.41138726, -0.090421245, -0.5568038, -0.117088675, 0.1952759, -0.2986329, 0.24587378, -0.28948125, 0.083854966, 0.3666834, 0.025861917, -0.0017132398, -0.18599303, -0.03876378, 0.12794247, 0.3335259, 0.22762723, -0.39386296, 0.08550472, 0.14752512, 0.2544776, 0.13983288, -0.030865967, -0.13050406, 0.124644294, -0.19920404, 0.18511117, -0.20913021, 0.18329288, -0.26674783, -0.20118403, 0.2890029, -0.40576503, -0.040320467, 0.09285337, 0.2520434, 0.0034789476, -0.042712186, -0.07351959, 0.08958212, 0.18637295, 0.15107888, -0.3961306, 0.2681504, -0.021723367, -0.024320226, -0.031113315, 0.19820504, 0.26775804, 0.13101543, -0.39684162, -0.1377801, 0.104406044, 0.28336167, -0.22840387, 0.16368443, -0.29106283, -0.38266584, -0.13229474, 0.20744194, 0.22572315, 0.16587397, -0.27363026, 0.1919627, -0.088201866, -0.42006725, -0.37401968, -0.11527952, 0.23918867, 0.19769923, 0.17820881, 0.2648834, 0.011358881, 0.12344342, 0.17440955, 0.1688173, -0.15462974, 0.18139134, -0.37719452, -0.07280123, -0.26092157, -0.19534999, -0.23901841, 0.3925651, -0.2268674, 0.2323894, 0.38537943, -0.32429454, -0.094378285, 0.16097061, 0.084330045, 0.07674305, -0.100616775, 0.20368801, 0.18317987, -0.10215017, 0.25102583, 0.013015796, 0.2618376, 0.19005089, 0.10934586, 0.121803135, 0.14295664, -0.13193665, 0.0531183, 0.017180707, -0.027863244, -0.26400146, -0.1470274, 0.21853186, -0.02715699, 0.027724234, -0.1564268, -0.102942735, 0.016841251, 0.40123186, -0.37726215, 0.26930076, 0.08578669, 0.1325882, -0.24705328, -0.21208054, 0.068617605, 0.18242663, -0.39056832, -0.01681968, 0.1632192, 0.10309466, 0.21451901, 0.25458825, 0.0111891525, -0.11184347, 0.4824411, -0.15262514, -0.12453984, 0.26980564, -0.24968751, -0.2749819, 0.27711532, -0.024115924, 0.30004704, 0.11883662, 0.026279865, 0.08240666, -0.6025861, 0.07997487, -0.46908012, 0.00318713, 0.017263228, -0.08420625, -0.20542656, 0.15257843, 0.28043687, -0.2576107, -0.029525554, 0.22024228, 0.08719829, -0.10169949, 0.47289637, -0.0070210746, 0.21146531, -0.09518342, 0.25460428, -0.22287658, 0.27640212, -0.2639986, -0.09115377, 0.012192329, 0.11418934, 0.042151, -0.062401574, -0.3224692, 0.21616587, -0.009924554, -0.03401868, -0.06122483, 0.09629407, 0.0010092515, 0.04388869, 0.06199242, 0.31550166, 0.21101683, -0.029377753, -0.36951146, -0.0112636825, -0.102699466, 0.035519373, 0.015170618, -0.014788982, 0.4407807, -0.09595226, 0.018110882, -0.13322428, 0.25438005, 0.20591274, 0.13240317, 0.1370632, 0.07530696, 0.15535797, -0.054800827, -0.00992456, -0.15799199, -0.22623523, -0.27510443, 0.21811266, -0.22423144, -0.1767162, 0.18349846, 0.22506163, -0.13641146, 0.12888514, 0.32516634, 0.116478354, -0.14479439, 0.2633992, -0.101380765, 0.08838486, 0.31893408, -0.022981562, 0.15893175, 0.51558924, 0.21190989, -0.36944315, -0.048417248, -0.22201133, -0.013516225, 0.26638502, -0.15765886, 0.1851696, 0.38429335, 0.30839187, 0.44408417, 0.016373433, -0.111148976, 0.09977997, 0.21016383, 0.018669449, -0.16498841, -0.15592425, 0.26489583, 0.06269013, -0.15986864, -0.03969585, -0.13937603, 0.031850375, -0.13655634, -0.40000895, 0.05436843, 0.21910602, -0.4996334, 0.086884744, -0.29076114, 0.05204509, -0.22826457, 0.20508406, -0.2507804, -0.10823102, 0.38743898, -0.100984976, 0.058472827, -0.16382112, -0.16156591, 0.023903372, 0.033624973, -0.023788728, -0.018010495, 0.3690815, -0.09954051, 0.052623548, 0.004742802, 0.19814914, -0.06711465, 0.17929058, 0.03160398, -0.1334006, -0.36719155, 0.13863695, -0.22021416, -0.4222436, -0.35124636, 0.36573943, -0.14639676, -0.26974335, -0.21589987, -0.2655588, 0.06684742, 0.17689691, 0.44980702, -0.4014982, -0.09032181, 0.48900092, -0.07084343, -0.1657361, 0.29341906, 0.21496047, -0.3124336, 0.3256341, 0.26760855, -0.04306482, 0.025937881, 0.5136517, 0.13405447, 0.18719737, -0.22120632, 0.4532139, -0.22904672, 0.32023016, -0.17113322, -0.214062, -0.19590136, -0.004523295, 0.3206988, 0.18094444, -0.4241107, -0.09262745, 0.01998503, 0.3636845, -0.36537707, -0.064887315, -0.0055596177, -0.34393403, 0.14510506, 0.06153031, 0.21042548, -0.37673673, 0.00559097, 0.3951263, -0.3184028, 0.13477531, 0.32866392, 0.11474525, 0.36869153, -0.041948557, 0.0048171515, 0.060693625, -0.23275205, 0.0050272522, 0.10768602, 0.5644927, 0.16615626, -0.408257, 0.09772438, 0.2569018, -0.17795925, 0.30351314, -0.08614802, -0.053504214, 0.26501653, -0.03555109, 0.15758383, -0.09940738, -0.24023113, -0.31173047, 0.38147378, -0.21063639, -0.09774987, -0.1817993, -0.11774616, -0.14214633, 0.059035324, -0.3725449, 0.3340544, 0.13414147, -0.19714779, -0.09575635, -0.09342445, -0.14242783, -0.22104931, -0.26635736, 0.4439432, -0.15269086, -0.45180112, 0.26066065, 0.040218845, 0.35135338, 0.042087253, 0.11012991, -0.070131294, 0.16262923, 0.093960635, -0.13531551, 0.27414426, 0.06098134, -0.5368306, -0.14256248, -0.237552, 0.084959276, 0.18812361, -0.33930442, 0.038618933, 0.020493202, 0.12378717, 0.025085747, -0.091824085, -0.07121754, 0.39458132, 0.23002088, 0.2822764, 0.09145013, 0.21399665, -0.044916313, -0.33112302, 0.037864167, 0.07553057, -0.20884651, 0.44166437, -0.08572297, -0.39234006, -0.072089285, 0.40756452, 0.09254245, 0.009302463, -0.047768205, 0.22046876, 0.16218755, -0.13332985, 0.18362974, -0.019280719, -0.12338676, -0.114508614, 0.05637966, -0.22382355, 0.056215905, -0.15588409, -0.0018170164, -0.21711601, 0.01625738, -0.19152458, 0.2638185, -0.3450164, 0.08638652, 0.06305374, 0.27371433, -0.35860744, -0.17530364, -0.08452919, 0.16955557, 0.28973067, 0.34323737, 0.01695178, 0.011290298, -0.1810764, -0.22997546, 0.059361335, -0.22219576, 0.1413909, 0.06546778, 0.24282089, -0.32794768, -0.17942922, 0.18658105, -0.097890414, -0.1513686, 0.39975017, 0.25039408, 0.21449175, 0.024416223, 0.27124867, 4.2602097E-4, -0.18044855, -0.13447432, -0.2740434, 0.07203264, -0.07980538, -0.07517155, -0.05881436, -0.12858282, -0.20378308, -0.16090913, 0.15564685, 0.13000391, 0.024031563, -0.057775024, -0.0034629763, -0.2688574, 0.32077903, 0.028848177, 0.047397353, -0.05958363, 0.04970749, -0.13669938, 0.22241849, 0.19625284, 0.08440484, -0.18950449, -0.07067425, -0.2850016, -0.36458367, 0.057933036, 0.13580492, 0.108672015, -0.07561426, -0.2602871, 0.010676954, -0.14393383, 0.17847988, 0.008865389, -0.1725383, -0.09610779, -0.07299303, -0.034239206, 0.074058056, -0.19913039, -0.20220406, -0.09904165, -0.076578766, -0.069398046, 0.33933428, -0.045407534, 0.2897711, -0.14395963, 0.006539133, -0.14036646, 0.1162395, -0.06965859, 0.039443154, 0.276661, -0.45402157, -0.16553378, -0.02244941, -0.19449434, -0.12615664, -0.07213336, -0.032101206, 0.2264348, -0.34752584, 0.24813168, -0.12469421, 0.19268365, -0.07229131, -0.26878825, -0.1646568, -0.0136323245, 0.24497744, -0.33925042, -0.24806993, -0.27512118, -0.09914559, -0.08087616, -0.25906667, 0.43983266, -0.10659018, -0.0744615, 0.006235671, 0.45885658, 0.20347849, 0.16656812, 0.20918673, -0.033345483, 0.044874102, 0.12723547, -0.47557342, 0.22238532, -0.22489853, -0.123347975, 0.0065780478, 0.110617466, -0.008447691, 0.044564214, -0.13392605, -0.09336997, 0.2162811, -0.3759698, -0.06168682, 0.26348773, 0.14962575, -0.2495721, 0.023211598, 0.13297167, 0.38353235, 0.08947278, -0.2225364, 0.105881885, -0.33606172, -0.042888578, -0.2036912, -0.3129537, 0.1476135, -0.07123925, 0.079535104, -0.09969524, -0.29478237, 0.21786276, -0.059731945, -0.092672855, 0.4276989, -3.9967615E-4, -0.12086898, 0.1315131, 0.01156185, 0.019471781, -0.10278702, 0.2727465, 0.20356695, -0.28574204, 0.16212702, -0.14182404, -0.051593896, -0.09539393]}] |-1320876223 |[(1293373212,0.17848861258809434),(-1719102856,0.2761524746260818),(1274183715,0.3148226377437515),(-612640902,0.32308714836804664),(-1548374770,0.32312628638943774),(1634839239,0.3992484826857293),(1510101612,0.3994126224276987)] |1293373212 |0.17848861258809434 |\n", + "|Sixth document, Florence in Italy, is a very beautiful city in Europe like Lyon in France. |[{document, 0, 89, Sixth document, Florence in Italy, is a very beautiful city in Europe like Lyon in France., {sentence -> 0}, []}] |[{document, 0, 89, Sixth document, Florence in Italy, is a very beautiful city in Europe like Lyon in France., {sentence -> 0}, []}] |[{token, 0, 4, Sixth, {sentence -> 0}, []}, {token, 6, 13, document, {sentence -> 0}, []}, {token, 14, 14, ,, {sentence -> 0}, []}, {token, 16, 23, Florence, {sentence -> 0}, []}, {token, 25, 26, in, {sentence -> 0}, []}, {token, 28, 32, Italy, {sentence -> 0}, []}, {token, 33, 33, ,, {sentence -> 0}, []}, {token, 35, 36, is, {sentence -> 0}, []}, {token, 38, 38, a, {sentence -> 0}, []}, {token, 40, 43, very, {sentence -> 0}, []}, {token, 45, 53, beautiful, {sentence -> 0}, []}, {token, 55, 58, city, {sentence -> 0}, []}, {token, 60, 61, in, {sentence -> 0}, []}, {token, 63, 68, Europe, {sentence -> 0}, []}, {token, 70, 73, like, {sentence -> 0}, []}, {token, 75, 78, Lyon, {sentence -> 0}, []}, {token, 80, 81, in, {sentence -> 0}, []}, {token, 83, 88, France, {sentence -> 0}, []}, {token, 89, 89, ., {sentence -> 0}, []}]|[{sentence_embeddings, 0, 89, Sixth document, Florence in Italy, is a very beautiful city in Europe like Lyon in France., {sentence -> 0, token -> Sixth document, Florence in Italy, is a very beautiful city in Europe like Lyon in France., pieceId -> -1, isWordStart -> true}, [-0.023446266, -0.20165044, -0.21878108, -0.041623246, 0.11712541, 0.17356895, 0.25060305, -0.07937133, -0.10039697, -0.15663107, 0.23232257, -0.01931893, -0.12145367, 0.10105985, -0.13435838, 0.49187884, 0.23358476, -0.45301625, 0.026803127, -0.04935983, -0.26145518, 0.05165556, 0.45950112, 0.306874, 0.0998373, 0.078277044, -0.14320722, -0.01781481, 0.19437787, 0.25057974, 0.2904214, 0.05932349, 0.102811575, 0.2567444, -0.24400775, 0.076585956, -0.3172145, 0.004280416, 0.23430033, -0.19544546, -0.08320522, 0.1736987, 0.20459501, -0.16766919, -0.11249152, 0.4083342, 0.23185952, 0.022361012, -0.12112098, -0.10556416, -0.36327595, 0.3500205, 0.3062079, 0.19041865, 0.0067362506, 0.039656542, -0.17330945, 0.25340703, -0.08069101, -0.119429514, -0.13032362, -0.20993283, -0.0167645, -0.050086897, 0.0056060716, -0.15245143, 0.08916381, -0.1656918, -0.14617592, 0.07238135, -0.09877798, 0.1185568, 0.16529602, -0.2908418, -0.2838721, 0.049403507, -0.6004667, -0.12694398, 0.300743, 0.42590234, -0.1330224, 0.21685761, 0.042737015, 0.23039605, -0.005907735, -0.046434082, -0.051799156, -0.10921319, 0.19471209, 0.280458, -0.21468745, -0.389082, 0.06603544, 0.0058575007, -0.092998825, -0.009324989, -0.038295563, -0.05944777, -0.17968194, -0.2114523, 0.07265465, -0.2734195, -0.1358477, 0.25927955, -0.031233825, -0.19235252, -0.029086787, 0.2901101, 0.06723385, -0.102146484, -0.16508116, 0.43281472, 0.32636094, 0.009097091, 0.01839175, 0.18650065, 0.10878458, -0.28116676, 0.40945655, -0.30725458, -0.022352053, -0.11574949, 0.14054005, 0.14310767, -0.21440077, 0.26043293, 0.13321282, 0.27419066, 0.17493708, 0.085719526, -0.04549965, 0.14518753, -0.11650214, 0.1432643, 0.2130986, 0.12127107, -0.013671406, -0.34709513, -0.22083285, 0.25214735, 0.35938737, 0.17598711, -0.04147198, 0.18509206, 0.10416745, 0.22944078, 0.16071385, -0.3968478, 0.04381103, 0.34408224, 0.1280205, 0.14642608, -0.08876359, -0.27770814, -0.25106782, -0.082514875, 0.05523707, -0.31298104, -0.1368086, 0.37814152, 0.032189853, -0.008601095, -0.15483275, -0.20344211, -0.024224738, -0.12747693, 0.049716156, 0.0946889, -0.08236277, -0.40535507, -0.09445458, -0.55537444, -0.117638126, 0.20131218, -0.29938865, 0.23693521, -0.29540664, 0.08935149, 0.37324956, 0.01921228, -0.0033368187, -0.20008223, -0.026781926, 0.11076178, 0.33815718, 0.2366743, -0.3875283, 0.07911281, 0.14018981, 0.2484453, 0.14206566, -0.03912406, -0.124510124, 0.12799053, -0.20131487, 0.18244149, -0.21000002, 0.18165322, -0.2643093, -0.20578533, 0.2962612, -0.4071083, -0.0464723, 0.094553664, 0.25337783, 0.009862542, -0.050271366, -0.07392379, 0.08882002, 0.17944442, 0.16397862, -0.39179775, 0.26736665, -0.0192729, -0.018442439, -0.047249265, 0.18863918, 0.27062738, 0.12270789, -0.38781646, -0.14433745, 0.10137269, 0.27908146, -0.22974536, 0.15082045, -0.28768408, -0.38333547, -0.14000101, 0.21542954, 0.22084652, 0.16104579, -0.27537724, 0.18188491, -0.085308656, -0.42232037, -0.3777818, -0.11865082, 0.24589635, 0.19026503, 0.16947359, 0.26346776, 0.01202057, 0.121433474, 0.17466974, 0.17356977, -0.14497308, 0.18488164, -0.37579408, -0.06502906, -0.26128498, -0.1877009, -0.23865594, 0.38699523, -0.23804992, 0.23802643, 0.37530044, -0.31416655, -0.090628475, 0.15442699, 0.083323605, 0.08340364, -0.10496614, 0.19882442, 0.17857088, -0.09811273, 0.2436273, 0.019029541, 0.26368392, 0.18991555, 0.10516484, 0.123206705, 0.1514704, -0.13549489, 0.059166815, 0.020459305, -0.018234596, -0.26658913, -0.15862788, 0.22305442, -0.029723663, 0.038318057, -0.16501893, -0.09223226, 0.0057948544, 0.39988422, -0.38048235, 0.25966093, 0.09570852, 0.14424734, -0.23434037, -0.21592666, 0.056217305, 0.17857862, -0.39444405, -0.010454616, 0.1634386, 0.10004582, 0.20535669, 0.2486722, -0.0013606278, -0.10910795, 0.47047195, -0.14693314, -0.13118234, 0.2695527, -0.2521114, -0.26835597, 0.27185902, -0.03241541, 0.29947615, 0.12267158, 0.03249368, 0.08601499, -0.5978525, 0.07179763, -0.46106672, -0.0033933884, 0.010675606, -0.090698555, -0.20369855, 0.15400767, 0.2822103, -0.26237437, -0.023959426, 0.22057839, 0.088779986, -0.10327962, 0.48200354, 0.009818012, 0.21735828, -0.09726218, 0.2565691, -0.2176083, 0.2767009, -0.26876214, -0.1043232, 0.019080801, 0.109899275, 0.046640366, -0.05314609, -0.3256458, 0.22454666, -0.0037985877, -0.03338025, -0.05607982, 0.10121204, 0.014488875, 0.044235453, 0.05585344, 0.3089643, 0.19958188, -0.034279544, -0.37689233, -0.02067298, -0.108184114, 0.04761538, 0.014341503, -0.011753305, 0.4448616, -0.09475451, 0.011186096, -0.1306354, 0.24714595, 0.21111813, 0.12608668, 0.13853198, 0.07227103, 0.14366136, -0.04700943, -0.0034062795, -0.16412996, -0.21816893, -0.27851155, 0.21785723, -0.2318628, -0.17842, 0.17996907, 0.23085351, -0.13216704, 0.135295, 0.32747313, 0.105372675, -0.14992404, 0.2551415, -0.09932614, 0.08560063, 0.32468835, -0.027163632, 0.1535825, 0.5111959, 0.20384456, -0.37404054, -0.041242123, -0.22566558, -0.0072444435, 0.26074654, -0.1534884, 0.18430914, 0.37578115, 0.29331762, 0.43657038, 0.018274307, -0.11042639, 0.09899991, 0.20794784, 0.017362932, -0.16367818, -0.16474143, 0.25822097, 0.061900627, -0.15261118, -0.04392805, -0.13885197, 0.028419657, -0.12466939, -0.38992488, 0.04835484, 0.20757526, -0.4971978, 0.09418101, -0.2879418, 0.055356182, -0.2286889, 0.2083682, -0.25274304, -0.10421833, 0.3857167, -0.09897424, 0.053298876, -0.168115, -0.16063228, 0.014610065, 0.028016137, -0.01940221, -0.018098384, 0.3685852, -0.099855445, 0.054043293, 0.0076535707, 0.1977779, -0.060267262, 0.17461002, 0.025309464, -0.14138436, -0.36965942, 0.14341168, -0.22054991, -0.41669595, -0.350635, 0.35354385, -0.14340346, -0.26205418, -0.21479966, -0.25892776, 0.06292348, 0.17838463, 0.44896346, -0.4010159, -0.082289286, 0.4955979, -0.06996464, -0.16989557, 0.2896182, 0.20098865, -0.31606963, 0.31730008, 0.27175304, -0.039616365, 0.03201407, 0.5099212, 0.13284244, 0.19106193, -0.2276362, 0.44946808, -0.24190746, 0.31880343, -0.1655242, -0.219303, -0.2011259, 0.0012589244, 0.31311673, 0.18145496, -0.41264796, -0.09883746, 0.02824915, 0.35613367, -0.35888487, -0.06726893, -0.0065832324, -0.34199518, 0.13907939, 0.061010145, 0.20799887, -0.3716825, -0.004743823, 0.39617428, -0.31897175, 0.12047265, 0.32285565, 0.10910455, 0.36702907, -0.027534468, -0.003306972, 0.060804185, -0.23777582, -0.0062758606, 0.11205024, 0.55437744, 0.17333084, -0.40158457, 0.08618948, 0.2623983, -0.16576529, 0.30049047, -0.085133836, -0.06609498, 0.27484143, -0.029358897, 0.15490839, -0.09690879, -0.23637532, -0.3109882, 0.38827002, -0.21937749, -0.10091087, -0.18243739, -0.1090896, -0.13279268, 0.04819067, -0.37471464, 0.33432263, 0.13068555, -0.18624684, -0.092815556, -0.09915114, -0.1402976, -0.21539931, -0.26148954, 0.43860984, -0.14875218, -0.45604524, 0.26144618, 0.046405524, 0.3500595, 0.036529973, 0.10993026, -0.06356534, 0.15648735, 0.10038366, -0.12816685, 0.26705813, 0.058001406, -0.5319837, -0.13663763, -0.23581989, 0.08892639, 0.1951976, -0.332798, 0.041280366, 0.014529156, 0.1333642, 0.025140464, -0.08970695, -0.07874118, 0.3977039, 0.23984994, 0.28039792, 0.08364948, 0.22422723, -0.038728643, -0.33996713, 0.034016807, 0.0771055, -0.20905045, 0.43872592, -0.08042428, -0.38293678, -0.06786471, 0.4005434, 0.099631764, 0.0023761084, -0.034679968, 0.22867568, 0.15132599, -0.13165158, 0.17360209, -0.023124326, -0.11094245, -0.123592794, 0.060751006, -0.2161707, 0.06489437, -0.16699107, -0.0067672064, -0.21374385, 0.006120859, -0.18749413, 0.25970832, -0.3388818, 0.09158317, 0.06126679, 0.28619418, -0.35591418, -0.1678413, -0.08147895, 0.17794101, 0.27810583, 0.3330661, 0.018313073, 0.018819055, -0.1754481, -0.22454868, 0.05389455, -0.22023496, 0.15060861, 0.07419974, 0.24520048, -0.32962036, -0.17017749, 0.18325652, -0.08431439, -0.14495264, 0.40784538, 0.25344115, 0.22945794, 0.026034003, 0.25645342, 0.009030903, -0.17643104, -0.12609684, -0.26928347, 0.07151792, -0.077939935, -0.07463445, -0.06530657, -0.12808533, -0.20940332, -0.14916818, 0.15249917, 0.13223007, 0.029716063, -0.060736544, -0.008027437, -0.27261928, 0.32002795, 0.022525271, 0.05300195, -0.049036894, 0.039181605, -0.14347084, 0.22075538, 0.2040532, 0.07992179, -0.18918262, -0.062667236, -0.28125694, -0.36992085, 0.06321907, 0.14176568, 0.107254215, -0.076132506, -0.26557773, 0.0027493138, -0.14170834, 0.18940176, 0.02132048, -0.16085538, -0.09090096, -0.069289364, -0.027277801, 0.076331265, -0.19630285, -0.1918632, -0.10226693, -0.08679162, -0.071425855, 0.338751, -0.055547565, 0.28674108, -0.15268509, 6.1385415E-4, -0.15316467, 0.10762864, -0.06699272, 0.044879604, 0.277528, -0.4518037, -0.16449177, -0.025337236, -0.20989054, -0.121194035, -0.06343536, -0.0277429, 0.22775164, -0.3471569, 0.2355159, -0.12448876, 0.19339418, -0.07414347, -0.26808125, -0.1616031, 1.1864491E-4, 0.2448654, -0.32890862, -0.2461858, -0.26607546, -0.10263033, -0.08375746, -0.25876787, 0.43655652, -0.11765381, -0.07028996, 0.0068271267, 0.45646456, 0.19998655, 0.162282, 0.21107695, -0.033425715, 0.03880097, 0.13234338, -0.46868268, 0.21881902, -0.22336914, -0.116309024, 0.005304634, 0.09728798, -0.015745807, 0.040650856, -0.13960604, -0.08813464, 0.22259608, -0.38552696, -0.060650844, 0.25103465, 0.14920752, -0.25241658, 0.0152171375, 0.12354388, 0.386575, 0.08479981, -0.21507308, 0.09969368, -0.32417792, -0.03249409, -0.18413575, -0.31314823, 0.13829513, -0.06986319, 0.087390184, -0.09777708, -0.30072027, 0.21826608, -0.05674966, -0.09027065, 0.42604133, -8.4666326E-4, -0.11587073, 0.14037885, 0.016692076, 0.020549806, -0.096504934, 0.26699734, 0.2102296, -0.28719124, 0.16563456, -0.13212183, -0.047723904, -0.09555403]}] |[{doc_similarity_rankings, 0, 89, Sixth document, Florence in Italy, is a very beautiful city in Europe like Lyon in France., {pieceId -> -1, lshId -> 1293373212, isWordStart -> true, token -> Sixth document, Florence in Italy, is a very beautiful city in Europe like Lyon in France., lshNeighbors -> [(-1320876223,0.17848861258809434),(-1719102856,0.2876650539432857),(-1548374770,0.3225589844685982),(1274183715,0.34962777859773414),(-612640902,0.3656377678477694),(1510101612,0.3979194244143298),(1634839239,0.39846872824443047)], sentence -> 0}, [-0.023446266, -0.20165044, -0.21878108, -0.041623246, 0.11712541, 0.17356895, 0.25060305, -0.07937133, -0.10039697, -0.15663107, 0.23232257, -0.01931893, -0.12145367, 0.10105985, -0.13435838, 0.49187884, 0.23358476, -0.45301625, 0.026803127, -0.04935983, -0.26145518, 0.05165556, 0.45950112, 0.306874, 0.0998373, 0.078277044, -0.14320722, -0.01781481, 0.19437787, 0.25057974, 0.2904214, 0.05932349, 0.102811575, 0.2567444, -0.24400775, 0.076585956, -0.3172145, 0.004280416, 0.23430033, -0.19544546, -0.08320522, 0.1736987, 0.20459501, -0.16766919, -0.11249152, 0.4083342, 0.23185952, 0.022361012, -0.12112098, -0.10556416, -0.36327595, 0.3500205, 0.3062079, 0.19041865, 0.0067362506, 0.039656542, -0.17330945, 0.25340703, -0.08069101, -0.119429514, -0.13032362, -0.20993283, -0.0167645, -0.050086897, 0.0056060716, -0.15245143, 0.08916381, -0.1656918, -0.14617592, 0.07238135, -0.09877798, 0.1185568, 0.16529602, -0.2908418, -0.2838721, 0.049403507, -0.6004667, -0.12694398, 0.300743, 0.42590234, -0.1330224, 0.21685761, 0.042737015, 0.23039605, -0.005907735, -0.046434082, -0.051799156, -0.10921319, 0.19471209, 0.280458, -0.21468745, -0.389082, 0.06603544, 0.0058575007, -0.092998825, -0.009324989, -0.038295563, -0.05944777, -0.17968194, -0.2114523, 0.07265465, -0.2734195, -0.1358477, 0.25927955, -0.031233825, -0.19235252, -0.029086787, 0.2901101, 0.06723385, -0.102146484, -0.16508116, 0.43281472, 0.32636094, 0.009097091, 0.01839175, 0.18650065, 0.10878458, -0.28116676, 0.40945655, -0.30725458, -0.022352053, -0.11574949, 0.14054005, 0.14310767, -0.21440077, 0.26043293, 0.13321282, 0.27419066, 0.17493708, 0.085719526, -0.04549965, 0.14518753, -0.11650214, 0.1432643, 0.2130986, 0.12127107, -0.013671406, -0.34709513, -0.22083285, 0.25214735, 0.35938737, 0.17598711, -0.04147198, 0.18509206, 0.10416745, 0.22944078, 0.16071385, -0.3968478, 0.04381103, 0.34408224, 0.1280205, 0.14642608, -0.08876359, -0.27770814, -0.25106782, -0.082514875, 0.05523707, -0.31298104, -0.1368086, 0.37814152, 0.032189853, -0.008601095, -0.15483275, -0.20344211, -0.024224738, -0.12747693, 0.049716156, 0.0946889, -0.08236277, -0.40535507, -0.09445458, -0.55537444, -0.117638126, 0.20131218, -0.29938865, 0.23693521, -0.29540664, 0.08935149, 0.37324956, 0.01921228, -0.0033368187, -0.20008223, -0.026781926, 0.11076178, 0.33815718, 0.2366743, -0.3875283, 0.07911281, 0.14018981, 0.2484453, 0.14206566, -0.03912406, -0.124510124, 0.12799053, -0.20131487, 0.18244149, -0.21000002, 0.18165322, -0.2643093, -0.20578533, 0.2962612, -0.4071083, -0.0464723, 0.094553664, 0.25337783, 0.009862542, -0.050271366, -0.07392379, 0.08882002, 0.17944442, 0.16397862, -0.39179775, 0.26736665, -0.0192729, -0.018442439, -0.047249265, 0.18863918, 0.27062738, 0.12270789, -0.38781646, -0.14433745, 0.10137269, 0.27908146, -0.22974536, 0.15082045, -0.28768408, -0.38333547, -0.14000101, 0.21542954, 0.22084652, 0.16104579, -0.27537724, 0.18188491, -0.085308656, -0.42232037, -0.3777818, -0.11865082, 0.24589635, 0.19026503, 0.16947359, 0.26346776, 0.01202057, 0.121433474, 0.17466974, 0.17356977, -0.14497308, 0.18488164, -0.37579408, -0.06502906, -0.26128498, -0.1877009, -0.23865594, 0.38699523, -0.23804992, 0.23802643, 0.37530044, -0.31416655, -0.090628475, 0.15442699, 0.083323605, 0.08340364, -0.10496614, 0.19882442, 0.17857088, -0.09811273, 0.2436273, 0.019029541, 0.26368392, 0.18991555, 0.10516484, 0.123206705, 0.1514704, -0.13549489, 0.059166815, 0.020459305, -0.018234596, -0.26658913, -0.15862788, 0.22305442, -0.029723663, 0.038318057, -0.16501893, -0.09223226, 0.0057948544, 0.39988422, -0.38048235, 0.25966093, 0.09570852, 0.14424734, -0.23434037, -0.21592666, 0.056217305, 0.17857862, -0.39444405, -0.010454616, 0.1634386, 0.10004582, 0.20535669, 0.2486722, -0.0013606278, -0.10910795, 0.47047195, -0.14693314, -0.13118234, 0.2695527, -0.2521114, -0.26835597, 0.27185902, -0.03241541, 0.29947615, 0.12267158, 0.03249368, 0.08601499, -0.5978525, 0.07179763, -0.46106672, -0.0033933884, 0.010675606, -0.090698555, -0.20369855, 0.15400767, 0.2822103, -0.26237437, -0.023959426, 0.22057839, 0.088779986, -0.10327962, 0.48200354, 0.009818012, 0.21735828, -0.09726218, 0.2565691, -0.2176083, 0.2767009, -0.26876214, -0.1043232, 0.019080801, 0.109899275, 0.046640366, -0.05314609, -0.3256458, 0.22454666, -0.0037985877, -0.03338025, -0.05607982, 0.10121204, 0.014488875, 0.044235453, 0.05585344, 0.3089643, 0.19958188, -0.034279544, -0.37689233, -0.02067298, -0.108184114, 0.04761538, 0.014341503, -0.011753305, 0.4448616, -0.09475451, 0.011186096, -0.1306354, 0.24714595, 0.21111813, 0.12608668, 0.13853198, 0.07227103, 0.14366136, -0.04700943, -0.0034062795, -0.16412996, -0.21816893, -0.27851155, 0.21785723, -0.2318628, -0.17842, 0.17996907, 0.23085351, -0.13216704, 0.135295, 0.32747313, 0.105372675, -0.14992404, 0.2551415, -0.09932614, 0.08560063, 0.32468835, -0.027163632, 0.1535825, 0.5111959, 0.20384456, -0.37404054, -0.041242123, -0.22566558, -0.0072444435, 0.26074654, -0.1534884, 0.18430914, 0.37578115, 0.29331762, 0.43657038, 0.018274307, -0.11042639, 0.09899991, 0.20794784, 0.017362932, -0.16367818, -0.16474143, 0.25822097, 0.061900627, -0.15261118, -0.04392805, -0.13885197, 0.028419657, -0.12466939, -0.38992488, 0.04835484, 0.20757526, -0.4971978, 0.09418101, -0.2879418, 0.055356182, -0.2286889, 0.2083682, -0.25274304, -0.10421833, 0.3857167, -0.09897424, 0.053298876, -0.168115, -0.16063228, 0.014610065, 0.028016137, -0.01940221, -0.018098384, 0.3685852, -0.099855445, 0.054043293, 0.0076535707, 0.1977779, -0.060267262, 0.17461002, 0.025309464, -0.14138436, -0.36965942, 0.14341168, -0.22054991, -0.41669595, -0.350635, 0.35354385, -0.14340346, -0.26205418, -0.21479966, -0.25892776, 0.06292348, 0.17838463, 0.44896346, -0.4010159, -0.082289286, 0.4955979, -0.06996464, -0.16989557, 0.2896182, 0.20098865, -0.31606963, 0.31730008, 0.27175304, -0.039616365, 0.03201407, 0.5099212, 0.13284244, 0.19106193, -0.2276362, 0.44946808, -0.24190746, 0.31880343, -0.1655242, -0.219303, -0.2011259, 0.0012589244, 0.31311673, 0.18145496, -0.41264796, -0.09883746, 0.02824915, 0.35613367, -0.35888487, -0.06726893, -0.0065832324, -0.34199518, 0.13907939, 0.061010145, 0.20799887, -0.3716825, -0.004743823, 0.39617428, -0.31897175, 0.12047265, 0.32285565, 0.10910455, 0.36702907, -0.027534468, -0.003306972, 0.060804185, -0.23777582, -0.0062758606, 0.11205024, 0.55437744, 0.17333084, -0.40158457, 0.08618948, 0.2623983, -0.16576529, 0.30049047, -0.085133836, -0.06609498, 0.27484143, -0.029358897, 0.15490839, -0.09690879, -0.23637532, -0.3109882, 0.38827002, -0.21937749, -0.10091087, -0.18243739, -0.1090896, -0.13279268, 0.04819067, -0.37471464, 0.33432263, 0.13068555, -0.18624684, -0.092815556, -0.09915114, -0.1402976, -0.21539931, -0.26148954, 0.43860984, -0.14875218, -0.45604524, 0.26144618, 0.046405524, 0.3500595, 0.036529973, 0.10993026, -0.06356534, 0.15648735, 0.10038366, -0.12816685, 0.26705813, 0.058001406, -0.5319837, -0.13663763, -0.23581989, 0.08892639, 0.1951976, -0.332798, 0.041280366, 0.014529156, 0.1333642, 0.025140464, -0.08970695, -0.07874118, 0.3977039, 0.23984994, 0.28039792, 0.08364948, 0.22422723, -0.038728643, -0.33996713, 0.034016807, 0.0771055, -0.20905045, 0.43872592, -0.08042428, -0.38293678, -0.06786471, 0.4005434, 0.099631764, 0.0023761084, -0.034679968, 0.22867568, 0.15132599, -0.13165158, 0.17360209, -0.023124326, -0.11094245, -0.123592794, 0.060751006, -0.2161707, 0.06489437, -0.16699107, -0.0067672064, -0.21374385, 0.006120859, -0.18749413, 0.25970832, -0.3388818, 0.09158317, 0.06126679, 0.28619418, -0.35591418, -0.1678413, -0.08147895, 0.17794101, 0.27810583, 0.3330661, 0.018313073, 0.018819055, -0.1754481, -0.22454868, 0.05389455, -0.22023496, 0.15060861, 0.07419974, 0.24520048, -0.32962036, -0.17017749, 0.18325652, -0.08431439, -0.14495264, 0.40784538, 0.25344115, 0.22945794, 0.026034003, 0.25645342, 0.009030903, -0.17643104, -0.12609684, -0.26928347, 0.07151792, -0.077939935, -0.07463445, -0.06530657, -0.12808533, -0.20940332, -0.14916818, 0.15249917, 0.13223007, 0.029716063, -0.060736544, -0.008027437, -0.27261928, 0.32002795, 0.022525271, 0.05300195, -0.049036894, 0.039181605, -0.14347084, 0.22075538, 0.2040532, 0.07992179, -0.18918262, -0.062667236, -0.28125694, -0.36992085, 0.06321907, 0.14176568, 0.107254215, -0.076132506, -0.26557773, 0.0027493138, -0.14170834, 0.18940176, 0.02132048, -0.16085538, -0.09090096, -0.069289364, -0.027277801, 0.076331265, -0.19630285, -0.1918632, -0.10226693, -0.08679162, -0.071425855, 0.338751, -0.055547565, 0.28674108, -0.15268509, 6.1385415E-4, -0.15316467, 0.10762864, -0.06699272, 0.044879604, 0.277528, -0.4518037, -0.16449177, -0.025337236, -0.20989054, -0.121194035, -0.06343536, -0.0277429, 0.22775164, -0.3471569, 0.2355159, -0.12448876, 0.19339418, -0.07414347, -0.26808125, -0.1616031, 1.1864491E-4, 0.2448654, -0.32890862, -0.2461858, -0.26607546, -0.10263033, -0.08375746, -0.25876787, 0.43655652, -0.11765381, -0.07028996, 0.0068271267, 0.45646456, 0.19998655, 0.162282, 0.21107695, -0.033425715, 0.03880097, 0.13234338, -0.46868268, 0.21881902, -0.22336914, -0.116309024, 0.005304634, 0.09728798, -0.015745807, 0.040650856, -0.13960604, -0.08813464, 0.22259608, -0.38552696, -0.060650844, 0.25103465, 0.14920752, -0.25241658, 0.0152171375, 0.12354388, 0.386575, 0.08479981, -0.21507308, 0.09969368, -0.32417792, -0.03249409, -0.18413575, -0.31314823, 0.13829513, -0.06986319, 0.087390184, -0.09777708, -0.30072027, 0.21826608, -0.05674966, -0.09027065, 0.42604133, -8.4666326E-4, -0.11587073, 0.14037885, 0.016692076, 0.020549806, -0.096504934, 0.26699734, 0.2102296, -0.28719124, 0.16563456, -0.13212183, -0.047723904, -0.09555403]}] |1293373212 |[(-1320876223,0.17848861258809434),(-1719102856,0.2876650539432857),(-1548374770,0.3225589844685982),(1274183715,0.34962777859773414),(-612640902,0.3656377678477694),(1510101612,0.3979194244143298),(1634839239,0.39846872824443047)] |-1320876223 |0.17848861258809434 |\n", + "|Seventh document, the French Riviera is the Mediterranean coastline of the southeast corner of France.|[{document, 0, 101, Seventh document, the French Riviera is the Mediterranean coastline of the southeast corner of France., {sentence -> 0}, []}]|[{document, 0, 101, Seventh document, the French Riviera is the Mediterranean coastline of the southeast corner of France., {sentence -> 0}, []}] |[{token, 0, 6, Seventh, {sentence -> 0}, []}, {token, 8, 15, document, {sentence -> 0}, []}, {token, 16, 16, ,, {sentence -> 0}, []}, {token, 18, 20, the, {sentence -> 0}, []}, {token, 22, 27, French, {sentence -> 0}, []}, {token, 29, 35, Riviera, {sentence -> 0}, []}, {token, 37, 38, is, {sentence -> 0}, []}, {token, 40, 42, the, {sentence -> 0}, []}, {token, 44, 56, Mediterranean, {sentence -> 0}, []}, {token, 58, 66, coastline, {sentence -> 0}, []}, {token, 68, 69, of, {sentence -> 0}, []}, {token, 71, 73, the, {sentence -> 0}, []}, {token, 75, 83, southeast, {sentence -> 0}, []}, {token, 85, 90, corner, {sentence -> 0}, []}, {token, 92, 93, of, {sentence -> 0}, []}, {token, 95, 100, France, {sentence -> 0}, []}, {token, 101, 101, ., {sentence -> 0}, []}] |[{sentence_embeddings, 0, 101, Seventh document, the French Riviera is the Mediterranean coastline of the southeast corner of France., {sentence -> 0, token -> Seventh document, the French Riviera is the Mediterranean coastline of the southeast corner of France., pieceId -> -1, isWordStart -> true}, [-0.008019538, -0.20794445, -0.22693227, -0.066497125, 0.13135312, 0.18650925, 0.26249522, -0.078561835, -0.108032554, -0.16549937, 0.23456018, -0.0029051038, -0.09654328, 0.1081486, -0.12367482, 0.48534602, 0.21623878, -0.45364743, 0.019192278, -0.02995625, -0.2504148, 0.050217465, 0.4518648, 0.32667267, 0.10702716, 0.0853897, -0.14142184, -0.03167035, 0.19115756, 0.24786223, 0.2914291, 0.05494155, 0.10942929, 0.24128707, -0.25258842, 0.06971938, -0.30623087, 0.009310224, 0.24930832, -0.21145473, -0.071628265, 0.16028066, 0.2130934, -0.15951225, -0.12180848, 0.41019738, 0.2232628, 0.035024345, -0.1233715, -0.09511672, -0.37004888, 0.3324171, 0.28777122, 0.19508287, -0.0010060468, 0.033361237, -0.15824829, 0.23507485, -0.07277971, -0.10862046, -0.1266775, -0.20873529, -0.016099958, -0.054443464, 0.03418388, -0.15287445, 0.08516955, -0.16009997, -0.13519952, 0.06594492, -0.09619981, 0.13498701, 0.17068765, -0.27748087, -0.27755773, 0.050838053, -0.60263795, -0.110697724, 0.29348016, 0.43039024, -0.10951068, 0.18981335, 0.045793816, 0.20932128, -0.013209117, -0.045443077, -0.04795611, -0.09545004, 0.18174982, 0.27740657, -0.20092762, -0.3743948, 0.070061244, 0.021248285, -0.10003458, -0.012923969, -0.015641354, -0.069142535, -0.18447016, -0.20632346, 0.061669473, -0.26073086, -0.13162859, 0.2652085, -0.020487502, -0.20239983, -0.019889005, 0.28115276, 0.06268703, -0.10772411, -0.17282209, 0.42130953, 0.3386013, 0.01063591, 0.012493721, 0.18252686, 0.11687154, -0.289261, 0.41447076, -0.3171783, -0.007732441, -0.12554668, 0.12227292, 0.14982867, -0.1994947, 0.2653794, 0.13194634, 0.27858624, 0.18165465, 0.09452338, -0.036926605, 0.14094561, -0.10714303, 0.14023595, 0.21794924, 0.11931853, 0.0083964225, -0.34428638, -0.2060787, 0.2581659, 0.34725285, 0.17249854, -0.035370246, 0.18123226, 0.115042284, 0.21278691, 0.16332792, -0.39327794, 0.043199714, 0.3466151, 0.10552055, 0.1811821, -0.094810925, -0.25144812, -0.2369235, -0.098048225, 0.053571597, -0.3044636, -0.117047854, 0.37140828, 0.03258837, 0.010649239, -0.1384685, -0.2107958, -0.04297339, -0.11824401, 0.022123057, 0.09906902, -0.08942813, -0.40615627, -0.09174467, -0.54138595, -0.12516883, 0.20398387, -0.3002764, 0.23201002, -0.2881544, 0.09412716, 0.38156128, 0.027156925, 0.008762595, -0.18784012, -0.022351125, 0.12036052, 0.3264586, 0.24900058, -0.39910597, 0.079319164, 0.15601031, 0.23772217, 0.13825494, -0.04249709, -0.11666459, 0.12905037, -0.19663046, 0.1725472, -0.22609569, 0.18567988, -0.2612912, -0.21441153, 0.29490396, -0.4131635, -0.044802688, 0.07255378, 0.25608072, 0.004686255, -0.05354772, -0.096422195, 0.10385672, 0.16806516, 0.16490765, -0.40542477, 0.27064338, -0.036462914, -0.031986397, -0.029910486, 0.1873078, 0.25759354, 0.10744408, -0.38749996, -0.13174862, 0.09937129, 0.26613194, -0.22159924, 0.18355416, -0.2635393, -0.37645352, -0.12610617, 0.20294958, 0.21151984, 0.16929722, -0.26906973, 0.17143078, -0.10153495, -0.42571825, -0.36661187, -0.102374844, 0.22459105, 0.19933926, 0.19093192, 0.24899375, 0.014875878, 0.12345995, 0.17558807, 0.16242947, -0.14835785, 0.172079, -0.36210755, -0.040536694, -0.2759419, -0.20727508, -0.23599353, 0.38549605, -0.20905586, 0.23385757, 0.40176284, -0.31578505, -0.086200155, 0.14503288, 0.087825194, 0.05920683, -0.11740758, 0.21260233, 0.19665754, -0.10235794, 0.258057, -0.014308026, 0.25915918, 0.16674384, 0.10508859, 0.11316951, 0.12858894, -0.15207034, 0.044372506, 0.010741287, -0.026460659, -0.23678094, -0.13959345, 0.22005278, -0.046174966, 0.029175762, -0.15699074, -0.07535748, 0.009295726, 0.39943105, -0.3637203, 0.26161516, 0.08734801, 0.14933671, -0.2341384, -0.21599188, 0.072215594, 0.18299405, -0.39975247, 0.0035565842, 0.14676017, 0.10756498, 0.22721377, 0.25173554, -0.006123567, -0.094908014, 0.4766384, -0.14474754, -0.13256605, 0.2748037, -0.24513212, -0.2763986, 0.2767801, -0.02837765, 0.3070326, 0.14310399, 0.02803333, 0.07966456, -0.59598786, 0.0634747, -0.46667746, 2.0662788E-4, 0.027827349, -0.09846748, -0.2049167, 0.14928561, 0.28899723, -0.24984509, -0.027508143, 0.21127011, 0.07993328, -0.10535655, 0.49076062, -0.0016710946, 0.22241512, -0.0761468, 0.26699233, -0.2132116, 0.26694208, -0.2699361, -0.102920294, 0.023504116, 0.08591295, 0.047616553, -0.03864741, -0.32397145, 0.22423643, 3.750706E-4, -0.052303057, -0.051092364, 0.10266755, 0.007874346, 0.056537174, 0.052040357, 0.3099191, 0.2147011, -0.041306116, -0.37230852, -0.02358203, -0.093814276, 0.06317558, 0.021807505, -0.0052324715, 0.44351, -0.084359095, 0.02457905, -0.120074786, 0.25709224, 0.20523037, 0.13982376, 0.12155931, 0.085087486, 0.15450434, -0.033903852, -0.008247593, -0.14950626, -0.22513278, -0.28045303, 0.20739494, -0.22394833, -0.16329561, 0.18090749, 0.21472976, -0.119760044, 0.122836865, 0.31932604, 0.10612227, -0.17157494, 0.2600004, -0.09567128, 0.08662764, 0.31872585, -0.032042537, 0.16470939, 0.50351644, 0.21122353, -0.36921608, -0.033876106, -0.22228853, -0.01841526, 0.26909, -0.15758824, 0.18417774, 0.3548506, 0.31725127, 0.43062854, 0.01227282, -0.12594888, 0.08146904, 0.21303466, 0.022855874, -0.1579743, -0.15339917, 0.2802583, 0.056297414, -0.14544499, -0.03212101, -0.14632796, 0.014579217, -0.13752632, -0.41132227, 0.04383321, 0.21508393, -0.4907415, 0.094107084, -0.31429338, 0.046641294, -0.2345635, 0.21059625, -0.24306759, -0.1063646, 0.3705863, -0.09811044, 0.0605867, -0.18104103, -0.14497119, 0.029493399, 0.03492019, -0.029783124, -0.013816307, 0.35751185, -0.10965113, 0.041228097, 0.013336186, 0.20357218, -0.069435954, 0.18225056, 0.027941927, -0.15454903, -0.3813045, 0.15006964, -0.19400018, -0.43550804, -0.36777005, 0.36024705, -0.1553815, -0.25671393, -0.2242871, -0.25538933, 0.05633095, 0.17045896, 0.46840438, -0.40827343, -0.06883438, 0.49420673, -0.054230284, -0.1784511, 0.3003717, 0.20356534, -0.30255544, 0.32934627, 0.28107437, -0.043982994, 0.030254403, 0.50613445, 0.106514424, 0.16891049, -0.21837108, 0.43107754, -0.22679833, 0.3222671, -0.14380512, -0.22103326, -0.20031895, -0.0053463904, 0.31086656, 0.18283913, -0.4164982, -0.10497948, 0.05488763, 0.3499168, -0.38303393, -0.0706, 0.02012401, -0.34419113, 0.14620271, 0.06066406, 0.23910847, -0.36826327, -0.0031818626, 0.39364952, -0.31577078, 0.13167827, 0.32565027, 0.09713475, 0.35657045, -0.031938132, 0.0063300836, 0.058500852, -0.24127181, -0.009309541, 0.1374363, 0.55266875, 0.15968034, -0.386893, 0.09495158, 0.24035685, -0.16255692, 0.29488644, -0.079560675, -0.06405346, 0.25573367, -0.031631544, 0.15875565, -0.089678556, -0.22789505, -0.30908328, 0.37180826, -0.20641276, -0.10318238, -0.16293754, -0.08793884, -0.13343672, 0.06730986, -0.36669317, 0.3368156, 0.12109322, -0.19045906, -0.08927799, -0.06881458, -0.15314971, -0.2044369, -0.2583901, 0.4240869, -0.16738205, -0.45303494, 0.2660113, 0.036534272, 0.34492457, 0.03064578, 0.1161642, -0.07246866, 0.14535797, 0.094102405, -0.13236499, 0.281681, 0.06910245, -0.5319742, -0.14167535, -0.2213673, 0.10062441, 0.1823384, -0.32449323, 0.045420066, 0.020613493, 0.14170578, 0.054557852, -0.081435464, -0.08237418, 0.39456445, 0.22010934, 0.27721396, 0.079628035, 0.21498804, -0.030950392, -0.3197281, 0.047628313, 0.06835473, -0.19502498, 0.4497397, -0.07986492, -0.37665737, -0.06270121, 0.40801352, 0.087262504, 0.013305801, -0.051705398, 0.20942184, 0.1442438, -0.12746692, 0.1790049, -0.019305615, -0.118932344, -0.10855935, 0.07830948, -0.213931, 0.03763846, -0.16343854, -0.022269689, -0.22339572, 0.03815981, -0.21411316, 0.26194704, -0.32970122, 0.09410265, 0.07144776, 0.28778374, -0.3441213, -0.16373956, -0.07426902, 0.160929, 0.27774033, 0.3449314, 0.032607112, 0.01710335, -0.1717972, -0.2522701, 0.056452833, -0.20888108, 0.15573634, 0.07741951, 0.21761471, -0.3139925, -0.18512204, 0.19587885, -0.093458064, -0.15175745, 0.39264217, 0.24714977, 0.23077905, 0.011358897, 0.2595526, 0.011583311, -0.18557008, -0.14353761, -0.25729296, 0.07521876, -0.096184544, -0.040979125, -0.05232608, -0.11970768, -0.21794261, -0.15748894, 0.14131306, 0.12565142, 0.013618528, -0.042706285, -0.009846034, -0.26986337, 0.32127848, 0.012022808, 0.053931307, -0.053089615, 0.04204643, -0.14650013, 0.2337747, 0.22185645, 0.084541485, -0.18815027, -0.05060835, -0.26873425, -0.34384862, 0.04447537, 0.14540635, 0.10909279, -0.07547726, -0.24596812, 0.01701418, -0.12477274, 0.17758216, -0.003989822, -0.15708753, -0.10013234, -0.060486514, -0.023525922, 0.07570741, -0.2043602, -0.20318846, -0.09272064, -0.08396323, -0.06631228, 0.34927878, -0.042691473, 0.30072594, -0.14322798, -0.0010577674, -0.16078945, 0.10943518, -0.065818414, 0.048736267, 0.27629223, -0.46065903, -0.15741968, -0.00574388, -0.20248723, -0.13032655, -0.075273484, -0.019586748, 0.22265866, -0.3558014, 0.2260905, -0.096864544, 0.18844095, -0.07203457, -0.2636315, -0.14861545, 0.0066989767, 0.24385743, -0.34070122, -0.25452897, -0.25891387, -0.09419047, -0.07291724, -0.24225563, 0.4279058, -0.11666133, -0.07513852, 0.0018023223, 0.448046, 0.20821133, 0.15960689, 0.228467, -0.026781378, 0.043798983, 0.11893418, -0.46809036, 0.2198233, -0.23621814, -0.1333021, 0.0064931093, 0.09569369, -0.024951685, 0.041569363, -0.11953565, -0.09252381, 0.22401738, -0.36581153, -0.070181325, 0.27557525, 0.16817227, -0.26221034, 0.036853302, 0.122759596, 0.38257086, 0.08625116, -0.2152178, 0.11895082, -0.33421177, -0.027113844, -0.18887155, -0.29267368, 0.14328273, -0.077010274, 0.08095182, -0.09670088, -0.29266798, 0.21065508, -0.058506, -0.08751463, 0.4218339, -0.0031686411, -0.12668662, 0.13693617, 0.010775516, 0.03800761, -0.101201124, 0.26593548, 0.20864183, -0.28382388, 0.15653086, -0.12020002, -0.031083556, -0.09614674]}]|[{doc_similarity_rankings, 0, 101, Seventh document, the French Riviera is the Mediterranean coastline of the southeast corner of France., {pieceId -> -1, lshId -> -1548374770, isWordStart -> true, token -> Seventh document, the French Riviera is the Mediterranean coastline of the southeast corner of France., lshNeighbors -> [(-1719102856,0.2329717161223739),(1274183715,0.3181712969830674),(-612640902,0.31909423657258823),(1293373212,0.3225589844685982),(-1320876223,0.32312628638943774),(1634839239,0.39013800843493296),(1510101612,0.41026101952006294)], sentence -> 0}, [-0.008019538, -0.20794445, -0.22693227, -0.066497125, 0.13135312, 0.18650925, 0.26249522, -0.078561835, -0.108032554, -0.16549937, 0.23456018, -0.0029051038, -0.09654328, 0.1081486, -0.12367482, 0.48534602, 0.21623878, -0.45364743, 0.019192278, -0.02995625, -0.2504148, 0.050217465, 0.4518648, 0.32667267, 0.10702716, 0.0853897, -0.14142184, -0.03167035, 0.19115756, 0.24786223, 0.2914291, 0.05494155, 0.10942929, 0.24128707, -0.25258842, 0.06971938, -0.30623087, 0.009310224, 0.24930832, -0.21145473, -0.071628265, 0.16028066, 0.2130934, -0.15951225, -0.12180848, 0.41019738, 0.2232628, 0.035024345, -0.1233715, -0.09511672, -0.37004888, 0.3324171, 0.28777122, 0.19508287, -0.0010060468, 0.033361237, -0.15824829, 0.23507485, -0.07277971, -0.10862046, -0.1266775, -0.20873529, -0.016099958, -0.054443464, 0.03418388, -0.15287445, 0.08516955, -0.16009997, -0.13519952, 0.06594492, -0.09619981, 0.13498701, 0.17068765, -0.27748087, -0.27755773, 0.050838053, -0.60263795, -0.110697724, 0.29348016, 0.43039024, -0.10951068, 0.18981335, 0.045793816, 0.20932128, -0.013209117, -0.045443077, -0.04795611, -0.09545004, 0.18174982, 0.27740657, -0.20092762, -0.3743948, 0.070061244, 0.021248285, -0.10003458, -0.012923969, -0.015641354, -0.069142535, -0.18447016, -0.20632346, 0.061669473, -0.26073086, -0.13162859, 0.2652085, -0.020487502, -0.20239983, -0.019889005, 0.28115276, 0.06268703, -0.10772411, -0.17282209, 0.42130953, 0.3386013, 0.01063591, 0.012493721, 0.18252686, 0.11687154, -0.289261, 0.41447076, -0.3171783, -0.007732441, -0.12554668, 0.12227292, 0.14982867, -0.1994947, 0.2653794, 0.13194634, 0.27858624, 0.18165465, 0.09452338, -0.036926605, 0.14094561, -0.10714303, 0.14023595, 0.21794924, 0.11931853, 0.0083964225, -0.34428638, -0.2060787, 0.2581659, 0.34725285, 0.17249854, -0.035370246, 0.18123226, 0.115042284, 0.21278691, 0.16332792, -0.39327794, 0.043199714, 0.3466151, 0.10552055, 0.1811821, -0.094810925, -0.25144812, -0.2369235, -0.098048225, 0.053571597, -0.3044636, -0.117047854, 0.37140828, 0.03258837, 0.010649239, -0.1384685, -0.2107958, -0.04297339, -0.11824401, 0.022123057, 0.09906902, -0.08942813, -0.40615627, -0.09174467, -0.54138595, -0.12516883, 0.20398387, -0.3002764, 0.23201002, -0.2881544, 0.09412716, 0.38156128, 0.027156925, 0.008762595, -0.18784012, -0.022351125, 0.12036052, 0.3264586, 0.24900058, -0.39910597, 0.079319164, 0.15601031, 0.23772217, 0.13825494, -0.04249709, -0.11666459, 0.12905037, -0.19663046, 0.1725472, -0.22609569, 0.18567988, -0.2612912, -0.21441153, 0.29490396, -0.4131635, -0.044802688, 0.07255378, 0.25608072, 0.004686255, -0.05354772, -0.096422195, 0.10385672, 0.16806516, 0.16490765, -0.40542477, 0.27064338, -0.036462914, -0.031986397, -0.029910486, 0.1873078, 0.25759354, 0.10744408, -0.38749996, -0.13174862, 0.09937129, 0.26613194, -0.22159924, 0.18355416, -0.2635393, -0.37645352, -0.12610617, 0.20294958, 0.21151984, 0.16929722, -0.26906973, 0.17143078, -0.10153495, -0.42571825, -0.36661187, -0.102374844, 0.22459105, 0.19933926, 0.19093192, 0.24899375, 0.014875878, 0.12345995, 0.17558807, 0.16242947, -0.14835785, 0.172079, -0.36210755, -0.040536694, -0.2759419, -0.20727508, -0.23599353, 0.38549605, -0.20905586, 0.23385757, 0.40176284, -0.31578505, -0.086200155, 0.14503288, 0.087825194, 0.05920683, -0.11740758, 0.21260233, 0.19665754, -0.10235794, 0.258057, -0.014308026, 0.25915918, 0.16674384, 0.10508859, 0.11316951, 0.12858894, -0.15207034, 0.044372506, 0.010741287, -0.026460659, -0.23678094, -0.13959345, 0.22005278, -0.046174966, 0.029175762, -0.15699074, -0.07535748, 0.009295726, 0.39943105, -0.3637203, 0.26161516, 0.08734801, 0.14933671, -0.2341384, -0.21599188, 0.072215594, 0.18299405, -0.39975247, 0.0035565842, 0.14676017, 0.10756498, 0.22721377, 0.25173554, -0.006123567, -0.094908014, 0.4766384, -0.14474754, -0.13256605, 0.2748037, -0.24513212, -0.2763986, 0.2767801, -0.02837765, 0.3070326, 0.14310399, 0.02803333, 0.07966456, -0.59598786, 0.0634747, -0.46667746, 2.0662788E-4, 0.027827349, -0.09846748, -0.2049167, 0.14928561, 0.28899723, -0.24984509, -0.027508143, 0.21127011, 0.07993328, -0.10535655, 0.49076062, -0.0016710946, 0.22241512, -0.0761468, 0.26699233, -0.2132116, 0.26694208, -0.2699361, -0.102920294, 0.023504116, 0.08591295, 0.047616553, -0.03864741, -0.32397145, 0.22423643, 3.750706E-4, -0.052303057, -0.051092364, 0.10266755, 0.007874346, 0.056537174, 0.052040357, 0.3099191, 0.2147011, -0.041306116, -0.37230852, -0.02358203, -0.093814276, 0.06317558, 0.021807505, -0.0052324715, 0.44351, -0.084359095, 0.02457905, -0.120074786, 0.25709224, 0.20523037, 0.13982376, 0.12155931, 0.085087486, 0.15450434, -0.033903852, -0.008247593, -0.14950626, -0.22513278, -0.28045303, 0.20739494, -0.22394833, -0.16329561, 0.18090749, 0.21472976, -0.119760044, 0.122836865, 0.31932604, 0.10612227, -0.17157494, 0.2600004, -0.09567128, 0.08662764, 0.31872585, -0.032042537, 0.16470939, 0.50351644, 0.21122353, -0.36921608, -0.033876106, -0.22228853, -0.01841526, 0.26909, -0.15758824, 0.18417774, 0.3548506, 0.31725127, 0.43062854, 0.01227282, -0.12594888, 0.08146904, 0.21303466, 0.022855874, -0.1579743, -0.15339917, 0.2802583, 0.056297414, -0.14544499, -0.03212101, -0.14632796, 0.014579217, -0.13752632, -0.41132227, 0.04383321, 0.21508393, -0.4907415, 0.094107084, -0.31429338, 0.046641294, -0.2345635, 0.21059625, -0.24306759, -0.1063646, 0.3705863, -0.09811044, 0.0605867, -0.18104103, -0.14497119, 0.029493399, 0.03492019, -0.029783124, -0.013816307, 0.35751185, -0.10965113, 0.041228097, 0.013336186, 0.20357218, -0.069435954, 0.18225056, 0.027941927, -0.15454903, -0.3813045, 0.15006964, -0.19400018, -0.43550804, -0.36777005, 0.36024705, -0.1553815, -0.25671393, -0.2242871, -0.25538933, 0.05633095, 0.17045896, 0.46840438, -0.40827343, -0.06883438, 0.49420673, -0.054230284, -0.1784511, 0.3003717, 0.20356534, -0.30255544, 0.32934627, 0.28107437, -0.043982994, 0.030254403, 0.50613445, 0.106514424, 0.16891049, -0.21837108, 0.43107754, -0.22679833, 0.3222671, -0.14380512, -0.22103326, -0.20031895, -0.0053463904, 0.31086656, 0.18283913, -0.4164982, -0.10497948, 0.05488763, 0.3499168, -0.38303393, -0.0706, 0.02012401, -0.34419113, 0.14620271, 0.06066406, 0.23910847, -0.36826327, -0.0031818626, 0.39364952, -0.31577078, 0.13167827, 0.32565027, 0.09713475, 0.35657045, -0.031938132, 0.0063300836, 0.058500852, -0.24127181, -0.009309541, 0.1374363, 0.55266875, 0.15968034, -0.386893, 0.09495158, 0.24035685, -0.16255692, 0.29488644, -0.079560675, -0.06405346, 0.25573367, -0.031631544, 0.15875565, -0.089678556, -0.22789505, -0.30908328, 0.37180826, -0.20641276, -0.10318238, -0.16293754, -0.08793884, -0.13343672, 0.06730986, -0.36669317, 0.3368156, 0.12109322, -0.19045906, -0.08927799, -0.06881458, -0.15314971, -0.2044369, -0.2583901, 0.4240869, -0.16738205, -0.45303494, 0.2660113, 0.036534272, 0.34492457, 0.03064578, 0.1161642, -0.07246866, 0.14535797, 0.094102405, -0.13236499, 0.281681, 0.06910245, -0.5319742, -0.14167535, -0.2213673, 0.10062441, 0.1823384, -0.32449323, 0.045420066, 0.020613493, 0.14170578, 0.054557852, -0.081435464, -0.08237418, 0.39456445, 0.22010934, 0.27721396, 0.079628035, 0.21498804, -0.030950392, -0.3197281, 0.047628313, 0.06835473, -0.19502498, 0.4497397, -0.07986492, -0.37665737, -0.06270121, 0.40801352, 0.087262504, 0.013305801, -0.051705398, 0.20942184, 0.1442438, -0.12746692, 0.1790049, -0.019305615, -0.118932344, -0.10855935, 0.07830948, -0.213931, 0.03763846, -0.16343854, -0.022269689, -0.22339572, 0.03815981, -0.21411316, 0.26194704, -0.32970122, 0.09410265, 0.07144776, 0.28778374, -0.3441213, -0.16373956, -0.07426902, 0.160929, 0.27774033, 0.3449314, 0.032607112, 0.01710335, -0.1717972, -0.2522701, 0.056452833, -0.20888108, 0.15573634, 0.07741951, 0.21761471, -0.3139925, -0.18512204, 0.19587885, -0.093458064, -0.15175745, 0.39264217, 0.24714977, 0.23077905, 0.011358897, 0.2595526, 0.011583311, -0.18557008, -0.14353761, -0.25729296, 0.07521876, -0.096184544, -0.040979125, -0.05232608, -0.11970768, -0.21794261, -0.15748894, 0.14131306, 0.12565142, 0.013618528, -0.042706285, -0.009846034, -0.26986337, 0.32127848, 0.012022808, 0.053931307, -0.053089615, 0.04204643, -0.14650013, 0.2337747, 0.22185645, 0.084541485, -0.18815027, -0.05060835, -0.26873425, -0.34384862, 0.04447537, 0.14540635, 0.10909279, -0.07547726, -0.24596812, 0.01701418, -0.12477274, 0.17758216, -0.003989822, -0.15708753, -0.10013234, -0.060486514, -0.023525922, 0.07570741, -0.2043602, -0.20318846, -0.09272064, -0.08396323, -0.06631228, 0.34927878, -0.042691473, 0.30072594, -0.14322798, -0.0010577674, -0.16078945, 0.10943518, -0.065818414, 0.048736267, 0.27629223, -0.46065903, -0.15741968, -0.00574388, -0.20248723, -0.13032655, -0.075273484, -0.019586748, 0.22265866, -0.3558014, 0.2260905, -0.096864544, 0.18844095, -0.07203457, -0.2636315, -0.14861545, 0.0066989767, 0.24385743, -0.34070122, -0.25452897, -0.25891387, -0.09419047, -0.07291724, -0.24225563, 0.4279058, -0.11666133, -0.07513852, 0.0018023223, 0.448046, 0.20821133, 0.15960689, 0.228467, -0.026781378, 0.043798983, 0.11893418, -0.46809036, 0.2198233, -0.23621814, -0.1333021, 0.0064931093, 0.09569369, -0.024951685, 0.041569363, -0.11953565, -0.09252381, 0.22401738, -0.36581153, -0.070181325, 0.27557525, 0.16817227, -0.26221034, 0.036853302, 0.122759596, 0.38257086, 0.08625116, -0.2152178, 0.11895082, -0.33421177, -0.027113844, -0.18887155, -0.29267368, 0.14328273, -0.077010274, 0.08095182, -0.09670088, -0.29266798, 0.21065508, -0.058506, -0.08751463, 0.4218339, -0.0031686411, -0.12668662, 0.13693617, 0.010775516, 0.03800761, -0.101201124, 0.26593548, 0.20864183, -0.28382388, 0.15653086, -0.12020002, -0.031083556, -0.09614674]}]|-1548374770 |[(-1719102856,0.2329717161223739),(1274183715,0.3181712969830674),(-612640902,0.31909423657258823),(1293373212,0.3225589844685982),(-1320876223,0.32312628638943774),(1634839239,0.39013800843493296),(1510101612,0.41026101952006294)] |-1719102856 |0.2329717161223739 |\n", + "|Eighth document, the warmest place in France is the French Riviera coast in Southern France. |[{document, 0, 91, Eighth document, the warmest place in France is the French Riviera coast in Southern France., {sentence -> 0}, []}] |[{document, 0, 91, Eighth document, the warmest place in France is the French Riviera coast in Southern France., {sentence -> 0}, []}] |[{token, 0, 5, Eighth, {sentence -> 0}, []}, {token, 7, 14, document, {sentence -> 0}, []}, {token, 15, 15, ,, {sentence -> 0}, []}, {token, 17, 19, the, {sentence -> 0}, []}, {token, 21, 27, warmest, {sentence -> 0}, []}, {token, 29, 33, place, {sentence -> 0}, []}, {token, 35, 36, in, {sentence -> 0}, []}, {token, 38, 43, France, {sentence -> 0}, []}, {token, 45, 46, is, {sentence -> 0}, []}, {token, 48, 50, the, {sentence -> 0}, []}, {token, 52, 57, French, {sentence -> 0}, []}, {token, 59, 65, Riviera, {sentence -> 0}, []}, {token, 67, 71, coast, {sentence -> 0}, []}, {token, 73, 74, in, {sentence -> 0}, []}, {token, 76, 83, Southern, {sentence -> 0}, []}, {token, 85, 90, France, {sentence -> 0}, []}, {token, 91, 91, ., {sentence -> 0}, []}] |[{sentence_embeddings, 0, 91, Eighth document, the warmest place in France is the French Riviera coast in Southern France., {sentence -> 0, token -> Eighth document, the warmest place in France is the French Riviera coast in Southern France., pieceId -> -1, isWordStart -> true}, [-0.016810948, -0.2047661, -0.2230267, -0.06613865, 0.13198656, 0.1876761, 0.26010045, -0.07860814, -0.08750986, -0.16776286, 0.2276038, -0.0048067835, -0.09549664, 0.09228111, -0.13566737, 0.49211678, 0.21506462, -0.46096098, 0.02290908, -0.030406667, -0.26267675, 0.06007962, 0.45803392, 0.32293695, 0.114533946, 0.08458777, -0.14395903, -0.04171004, 0.19107072, 0.24035561, 0.30237022, 0.06198846, 0.1061096, 0.23064552, -0.24602959, 0.06736374, -0.30546662, 0.018916616, 0.23576747, -0.20862201, -0.07291601, 0.16750018, 0.21796244, -0.15457813, -0.10267717, 0.4108639, 0.22948943, 0.024940656, -0.12781665, -0.09829003, -0.36444336, 0.33999962, 0.2979673, 0.18795507, 0.0058128177, 0.03166188, -0.16602582, 0.23594972, -0.08020127, -0.10588683, -0.13476343, -0.20074773, -0.018953657, -0.049433485, 0.024178218, -0.15827763, 0.08096834, -0.16638803, -0.13942112, 0.066614725, -0.088811725, 0.13083075, 0.16235103, -0.28993893, -0.27632293, 0.050675448, -0.60434693, -0.11318788, 0.28318927, 0.4251388, -0.115691744, 0.20609955, 0.048247, 0.22572593, -0.009609341, -0.047968842, -0.063365586, -0.09246086, 0.17824914, 0.27709022, -0.19850928, -0.3818279, 0.076068364, 0.021581797, -0.10875837, -0.012900721, -0.02160152, -0.07872134, -0.18224761, -0.19908729, 0.06501623, -0.26073945, -0.13469686, 0.26163143, -0.016409602, -0.19890097, -0.027429793, 0.29457363, 0.06711283, -0.11308402, -0.16379794, 0.42119157, 0.33880904, 0.006702892, 0.011503242, 0.17496394, 0.11048721, -0.29479164, 0.41352564, -0.31115502, -0.014682422, -0.12465822, 0.122917116, 0.15077394, -0.20986927, 0.26472732, 0.12745324, 0.28114912, 0.17852971, 0.09809405, -0.021323938, 0.1425695, -0.116022065, 0.15450992, 0.21419188, 0.10874285, -0.0035544126, -0.33577085, -0.21062046, 0.2557292, 0.365727, 0.17186679, -0.034007914, 0.18571931, 0.113826446, 0.2041972, 0.16858879, -0.39305702, 0.047708813, 0.3407281, 0.10008117, 0.1666761, -0.08277356, -0.2655564, -0.2390036, -0.08099706, 0.045763668, -0.30731857, -0.11569927, 0.37365586, 0.01956875, -0.001237718, -0.14353643, -0.21356548, -0.03486019, -0.12086741, 0.041639853, 0.104439534, -0.092656165, -0.40179342, -0.10187488, -0.5471143, -0.12290574, 0.2087677, -0.30770865, 0.22984694, -0.29476187, 0.096242994, 0.384363, 0.026361842, 5.725033E-4, -0.19238421, -0.028222995, 0.113249354, 0.32910722, 0.23294336, -0.3986335, 0.09978031, 0.14896542, 0.23949988, 0.1478057, -0.032381695, -0.12643869, 0.115882315, -0.19580248, 0.19138065, -0.21877433, 0.19352977, -0.2535542, -0.20547706, 0.27981648, -0.4005575, -0.046523586, 0.09361415, 0.2590049, 0.007964988, -0.05637875, -0.08670184, 0.099779375, 0.18480189, 0.14401811, -0.39154035, 0.2751374, -0.03074833, -0.024801074, -0.021942627, 0.18419088, 0.2608532, 0.11138497, -0.40339246, -0.13331044, 0.1154039, 0.2723197, -0.2262617, 0.16250908, -0.26903376, -0.37250632, -0.13596842, 0.21473151, 0.21515769, 0.16038711, -0.27683273, 0.17736936, -0.08310452, -0.42004582, -0.36562136, -0.09779574, 0.2411314, 0.20187439, 0.18733725, 0.2641376, 0.017855817, 0.11153809, 0.17190574, 0.16802579, -0.16192591, 0.18056063, -0.35846385, -0.049806055, -0.26109663, -0.19828144, -0.23734608, 0.3932265, -0.22376418, 0.22467436, 0.38326305, -0.31592938, -0.081319235, 0.1556078, 0.08884176, 0.06665615, -0.10425053, 0.20910178, 0.17956693, -0.10533009, 0.24591704, -0.0038596322, 0.2617894, 0.18107952, 0.09749653, 0.12563631, 0.12635578, -0.1391452, 0.041367147, 0.008686017, -0.029572926, -0.24497731, -0.14718057, 0.21611099, -0.03282076, 0.03800035, -0.16256967, -0.09790739, -0.0014242514, 0.4040815, -0.37026706, 0.26380262, 0.09079506, 0.14612387, -0.24610792, -0.1947632, 0.07052605, 0.18185152, -0.40230885, -0.007907403, 0.15706225, 0.111771695, 0.23515886, 0.25010493, -0.01306646, -0.10541734, 0.4784001, -0.14441222, -0.13855816, 0.27101827, -0.24725674, -0.2802681, 0.27687818, -0.03339839, 0.3054517, 0.130619, 0.037950855, 0.077942155, -0.5888695, 0.07351768, -0.46685404, -0.0040499587, 0.025250355, -0.0859045, -0.20816529, 0.14463536, 0.28113353, -0.25993484, -0.040858176, 0.21026222, 0.08361061, -0.10720821, 0.48898703, -2.2704061E-4, 0.2221854, -0.08727743, 0.2570495, -0.20949613, 0.25267476, -0.27079397, -0.09415934, 0.006743326, 0.09153167, 0.053167544, -0.03806283, -0.3219283, 0.22196239, 0.0017300758, -0.057169266, -0.05333144, 0.0975005, 0.005131098, 0.046497438, 0.0623432, 0.321922, 0.21922378, -0.03954325, -0.37501228, -0.0155652305, -0.09921332, 0.06552464, 0.02618605, -0.014657838, 0.4420349, -0.08608749, 0.028822435, -0.132511, 0.26999778, 0.20299375, 0.1389036, 0.12914367, 0.07623987, 0.14375348, -0.052015696, -0.018790662, -0.13280135, -0.21133803, -0.27303988, 0.20791331, -0.22594361, -0.17630367, 0.18414178, 0.21877678, -0.12745881, 0.13150722, 0.32212988, 0.11044695, -0.16207896, 0.27268118, -0.099328026, 0.08867667, 0.31989032, -0.015583255, 0.16372082, 0.51543236, 0.20965122, -0.37761936, -0.029446285, -0.22624405, -0.0051141595, 0.26030782, -0.16369255, 0.1629463, 0.36997578, 0.31189638, 0.42942852, -0.001673679, -0.1205522, 0.08194525, 0.19638115, 0.011460368, -0.16485056, -0.16228262, 0.2740312, 0.05017268, -0.15451404, -0.03512774, -0.13452254, 0.028118514, -0.1329012, -0.4101204, 0.03879618, 0.21135166, -0.4898482, 0.091599375, -0.3092855, 0.06110656, -0.22477996, 0.21057707, -0.23781657, -0.10551279, 0.37984648, -0.09410357, 0.04732256, -0.18294896, -0.15246752, 0.021639392, 0.02237629, -0.017262453, -0.026302386, 0.3608514, -0.108894534, 0.0474644, 0.024507962, 0.1971899, -0.06268896, 0.19199464, 0.032802872, -0.13034374, -0.3862199, 0.15018144, -0.20086884, -0.4251439, -0.3633213, 0.35504803, -0.15731166, -0.25986132, -0.22435285, -0.26186633, 0.070692174, 0.16790512, 0.4691279, -0.39221457, -0.07939644, 0.4884994, -0.045310415, -0.18310241, 0.29507643, 0.2049564, -0.31412512, 0.3319548, 0.26859912, -0.046015155, 0.017437246, 0.50600624, 0.13420314, 0.16892372, -0.2181584, 0.43435982, -0.22135681, 0.32880262, -0.15483025, -0.22297119, -0.19834407, -0.013414336, 0.3219674, 0.1825749, -0.42209828, -0.10438974, 0.037324775, 0.361452, -0.37822384, -0.065436505, 0.0033893238, -0.33716473, 0.14011075, 0.061164405, 0.22412765, -0.37722385, -0.003241484, 0.39605972, -0.32297456, 0.121618845, 0.31544292, 0.098115414, 0.3545687, -0.040462196, -0.0015956911, 0.05998545, -0.2341839, -0.012709303, 0.12954898, 0.55812806, 0.15784661, -0.38522407, 0.10765506, 0.24407583, -0.16438684, 0.29567552, -0.087225564, -0.05044142, 0.26510367, -0.03295447, 0.1525916, -0.09594085, -0.23619364, -0.305938, 0.369586, -0.2030003, -0.10078699, -0.158635, -0.09669019, -0.13501357, 0.06365931, -0.3862268, 0.33531812, 0.13119636, -0.19266993, -0.08529151, -0.068182945, -0.14552347, -0.20925495, -0.26174715, 0.42169508, -0.15332408, -0.4422107, 0.26490703, 0.044914585, 0.3462565, 0.028742079, 0.112568825, -0.049105942, 0.14644153, 0.10022157, -0.1353474, 0.2859512, 0.05943501, -0.5361389, -0.14724284, -0.22212745, 0.09529675, 0.19780394, -0.3280398, 0.03847211, 0.047032848, 0.13924433, 0.04658549, -0.087256454, -0.06858529, 0.39810196, 0.2195755, 0.27559587, 0.080366105, 0.224659, -0.037330583, -0.32662335, 0.049084876, 0.08356128, -0.2064612, 0.44953158, -0.07670108, -0.3735964, -0.07003661, 0.40409234, 0.09436887, 0.002322631, -0.058938935, 0.21089995, 0.15257117, -0.11534224, 0.1868882, -0.027790006, -0.13924241, -0.11449168, 0.07226305, -0.2269321, 0.05748148, -0.15779555, -0.004239961, -0.2076957, 0.025448453, -0.20258994, 0.26121607, -0.3375632, 0.09614588, 0.0837002, 0.2889494, -0.35512874, -0.16446145, -0.07327044, 0.162409, 0.27919483, 0.33874825, 0.03656232, 0.020300211, -0.18202537, -0.24737185, 0.062216822, -0.20302315, 0.13892165, 0.07089765, 0.22756256, -0.33009684, -0.18666261, 0.20215912, -0.09404198, -0.14015506, 0.3908819, 0.23564205, 0.2299575, 0.0022593169, 0.25940043, 0.010104028, -0.17387737, -0.12020657, -0.25978172, 0.069076784, -0.09353954, -0.057409134, -0.048858702, -0.115044445, -0.19313143, -0.15462089, 0.14306325, 0.13297117, 0.018673927, -0.05574753, -0.0076537174, -0.28154808, 0.31979483, 0.026106903, 0.04603346, -0.059027776, 0.041025206, -0.13605414, 0.23317498, 0.21174069, 0.078041025, -0.17607999, -0.051855393, -0.27796885, -0.35570046, 0.04882217, 0.13480361, 0.11094927, -0.07341316, -0.2513282, 0.017097535, -0.12939982, 0.1765655, 0.0043726726, -0.1646482, -0.09244843, -0.07229631, -0.036124315, 0.0713724, -0.20348924, -0.20200686, -0.099306956, -0.07727608, -0.06878283, 0.34491593, -0.04862456, 0.288199, -0.14932868, -0.011248313, -0.1619775, 0.12771314, -0.067322426, 0.050312262, 0.26488206, -0.45438048, -0.16701354, -0.024269667, -0.20508873, -0.13706926, -0.081109755, -0.032133017, 0.22538628, -0.35670912, 0.24337423, -0.11686166, 0.17959888, -0.07400869, -0.26138356, -0.14498967, 0.002314695, 0.2509966, -0.33920595, -0.24641253, -0.26611453, -0.093208805, -0.0814982, -0.25990567, 0.4346015, -0.12232823, -0.060464166, -0.0034285442, 0.44883457, 0.2001189, 0.1663645, 0.21496214, -0.038633876, 0.04551105, 0.11791142, -0.4636027, 0.2206974, -0.22927228, -0.118581764, 0.018132765, 0.09559669, -0.022403285, 0.03678469, -0.118074425, -0.084937155, 0.22283584, -0.37808853, -0.05847166, 0.27044755, 0.16123472, -0.26384753, 0.029716926, 0.12959477, 0.39335707, 0.088695444, -0.22262987, 0.11189321, -0.33450133, -0.040625323, -0.19362892, -0.29486585, 0.14436631, -0.07635042, 0.0817869, -0.09728282, -0.2813908, 0.21378614, -0.054669898, -0.09780386, 0.41780302, -0.0015549128, -0.1261193, 0.13937768, 0.0039213966, 0.021901237, -0.10023584, 0.2714901, 0.20122135, -0.27988607, 0.14680666, -0.13224062, -0.03425929, -0.09730296]}] |[{doc_similarity_rankings, 0, 91, Eighth document, the warmest place in France is the French Riviera coast in Southern France., {pieceId -> -1, lshId -> -1719102856, isWordStart -> true, token -> Eighth document, the warmest place in France is the French Riviera coast in Southern France., lshNeighbors -> [(-1548374770,0.2329717161223739),(-1320876223,0.2761524746260818),(1274183715,0.28519768414650126),(1293373212,0.2876650539432857),(-612640902,0.2991777399965483),(1634839239,0.3901714913624425),(1510101612,0.4043799951515284)], sentence -> 0}, [-0.016810948, -0.2047661, -0.2230267, -0.06613865, 0.13198656, 0.1876761, 0.26010045, -0.07860814, -0.08750986, -0.16776286, 0.2276038, -0.0048067835, -0.09549664, 0.09228111, -0.13566737, 0.49211678, 0.21506462, -0.46096098, 0.02290908, -0.030406667, -0.26267675, 0.06007962, 0.45803392, 0.32293695, 0.114533946, 0.08458777, -0.14395903, -0.04171004, 0.19107072, 0.24035561, 0.30237022, 0.06198846, 0.1061096, 0.23064552, -0.24602959, 0.06736374, -0.30546662, 0.018916616, 0.23576747, -0.20862201, -0.07291601, 0.16750018, 0.21796244, -0.15457813, -0.10267717, 0.4108639, 0.22948943, 0.024940656, -0.12781665, -0.09829003, -0.36444336, 0.33999962, 0.2979673, 0.18795507, 0.0058128177, 0.03166188, -0.16602582, 0.23594972, -0.08020127, -0.10588683, -0.13476343, -0.20074773, -0.018953657, -0.049433485, 0.024178218, -0.15827763, 0.08096834, -0.16638803, -0.13942112, 0.066614725, -0.088811725, 0.13083075, 0.16235103, -0.28993893, -0.27632293, 0.050675448, -0.60434693, -0.11318788, 0.28318927, 0.4251388, -0.115691744, 0.20609955, 0.048247, 0.22572593, -0.009609341, -0.047968842, -0.063365586, -0.09246086, 0.17824914, 0.27709022, -0.19850928, -0.3818279, 0.076068364, 0.021581797, -0.10875837, -0.012900721, -0.02160152, -0.07872134, -0.18224761, -0.19908729, 0.06501623, -0.26073945, -0.13469686, 0.26163143, -0.016409602, -0.19890097, -0.027429793, 0.29457363, 0.06711283, -0.11308402, -0.16379794, 0.42119157, 0.33880904, 0.006702892, 0.011503242, 0.17496394, 0.11048721, -0.29479164, 0.41352564, -0.31115502, -0.014682422, -0.12465822, 0.122917116, 0.15077394, -0.20986927, 0.26472732, 0.12745324, 0.28114912, 0.17852971, 0.09809405, -0.021323938, 0.1425695, -0.116022065, 0.15450992, 0.21419188, 0.10874285, -0.0035544126, -0.33577085, -0.21062046, 0.2557292, 0.365727, 0.17186679, -0.034007914, 0.18571931, 0.113826446, 0.2041972, 0.16858879, -0.39305702, 0.047708813, 0.3407281, 0.10008117, 0.1666761, -0.08277356, -0.2655564, -0.2390036, -0.08099706, 0.045763668, -0.30731857, -0.11569927, 0.37365586, 0.01956875, -0.001237718, -0.14353643, -0.21356548, -0.03486019, -0.12086741, 0.041639853, 0.104439534, -0.092656165, -0.40179342, -0.10187488, -0.5471143, -0.12290574, 0.2087677, -0.30770865, 0.22984694, -0.29476187, 0.096242994, 0.384363, 0.026361842, 5.725033E-4, -0.19238421, -0.028222995, 0.113249354, 0.32910722, 0.23294336, -0.3986335, 0.09978031, 0.14896542, 0.23949988, 0.1478057, -0.032381695, -0.12643869, 0.115882315, -0.19580248, 0.19138065, -0.21877433, 0.19352977, -0.2535542, -0.20547706, 0.27981648, -0.4005575, -0.046523586, 0.09361415, 0.2590049, 0.007964988, -0.05637875, -0.08670184, 0.099779375, 0.18480189, 0.14401811, -0.39154035, 0.2751374, -0.03074833, -0.024801074, -0.021942627, 0.18419088, 0.2608532, 0.11138497, -0.40339246, -0.13331044, 0.1154039, 0.2723197, -0.2262617, 0.16250908, -0.26903376, -0.37250632, -0.13596842, 0.21473151, 0.21515769, 0.16038711, -0.27683273, 0.17736936, -0.08310452, -0.42004582, -0.36562136, -0.09779574, 0.2411314, 0.20187439, 0.18733725, 0.2641376, 0.017855817, 0.11153809, 0.17190574, 0.16802579, -0.16192591, 0.18056063, -0.35846385, -0.049806055, -0.26109663, -0.19828144, -0.23734608, 0.3932265, -0.22376418, 0.22467436, 0.38326305, -0.31592938, -0.081319235, 0.1556078, 0.08884176, 0.06665615, -0.10425053, 0.20910178, 0.17956693, -0.10533009, 0.24591704, -0.0038596322, 0.2617894, 0.18107952, 0.09749653, 0.12563631, 0.12635578, -0.1391452, 0.041367147, 0.008686017, -0.029572926, -0.24497731, -0.14718057, 0.21611099, -0.03282076, 0.03800035, -0.16256967, -0.09790739, -0.0014242514, 0.4040815, -0.37026706, 0.26380262, 0.09079506, 0.14612387, -0.24610792, -0.1947632, 0.07052605, 0.18185152, -0.40230885, -0.007907403, 0.15706225, 0.111771695, 0.23515886, 0.25010493, -0.01306646, -0.10541734, 0.4784001, -0.14441222, -0.13855816, 0.27101827, -0.24725674, -0.2802681, 0.27687818, -0.03339839, 0.3054517, 0.130619, 0.037950855, 0.077942155, -0.5888695, 0.07351768, -0.46685404, -0.0040499587, 0.025250355, -0.0859045, -0.20816529, 0.14463536, 0.28113353, -0.25993484, -0.040858176, 0.21026222, 0.08361061, -0.10720821, 0.48898703, -2.2704061E-4, 0.2221854, -0.08727743, 0.2570495, -0.20949613, 0.25267476, -0.27079397, -0.09415934, 0.006743326, 0.09153167, 0.053167544, -0.03806283, -0.3219283, 0.22196239, 0.0017300758, -0.057169266, -0.05333144, 0.0975005, 0.005131098, 0.046497438, 0.0623432, 0.321922, 0.21922378, -0.03954325, -0.37501228, -0.0155652305, -0.09921332, 0.06552464, 0.02618605, -0.014657838, 0.4420349, -0.08608749, 0.028822435, -0.132511, 0.26999778, 0.20299375, 0.1389036, 0.12914367, 0.07623987, 0.14375348, -0.052015696, -0.018790662, -0.13280135, -0.21133803, -0.27303988, 0.20791331, -0.22594361, -0.17630367, 0.18414178, 0.21877678, -0.12745881, 0.13150722, 0.32212988, 0.11044695, -0.16207896, 0.27268118, -0.099328026, 0.08867667, 0.31989032, -0.015583255, 0.16372082, 0.51543236, 0.20965122, -0.37761936, -0.029446285, -0.22624405, -0.0051141595, 0.26030782, -0.16369255, 0.1629463, 0.36997578, 0.31189638, 0.42942852, -0.001673679, -0.1205522, 0.08194525, 0.19638115, 0.011460368, -0.16485056, -0.16228262, 0.2740312, 0.05017268, -0.15451404, -0.03512774, -0.13452254, 0.028118514, -0.1329012, -0.4101204, 0.03879618, 0.21135166, -0.4898482, 0.091599375, -0.3092855, 0.06110656, -0.22477996, 0.21057707, -0.23781657, -0.10551279, 0.37984648, -0.09410357, 0.04732256, -0.18294896, -0.15246752, 0.021639392, 0.02237629, -0.017262453, -0.026302386, 0.3608514, -0.108894534, 0.0474644, 0.024507962, 0.1971899, -0.06268896, 0.19199464, 0.032802872, -0.13034374, -0.3862199, 0.15018144, -0.20086884, -0.4251439, -0.3633213, 0.35504803, -0.15731166, -0.25986132, -0.22435285, -0.26186633, 0.070692174, 0.16790512, 0.4691279, -0.39221457, -0.07939644, 0.4884994, -0.045310415, -0.18310241, 0.29507643, 0.2049564, -0.31412512, 0.3319548, 0.26859912, -0.046015155, 0.017437246, 0.50600624, 0.13420314, 0.16892372, -0.2181584, 0.43435982, -0.22135681, 0.32880262, -0.15483025, -0.22297119, -0.19834407, -0.013414336, 0.3219674, 0.1825749, -0.42209828, -0.10438974, 0.037324775, 0.361452, -0.37822384, -0.065436505, 0.0033893238, -0.33716473, 0.14011075, 0.061164405, 0.22412765, -0.37722385, -0.003241484, 0.39605972, -0.32297456, 0.121618845, 0.31544292, 0.098115414, 0.3545687, -0.040462196, -0.0015956911, 0.05998545, -0.2341839, -0.012709303, 0.12954898, 0.55812806, 0.15784661, -0.38522407, 0.10765506, 0.24407583, -0.16438684, 0.29567552, -0.087225564, -0.05044142, 0.26510367, -0.03295447, 0.1525916, -0.09594085, -0.23619364, -0.305938, 0.369586, -0.2030003, -0.10078699, -0.158635, -0.09669019, -0.13501357, 0.06365931, -0.3862268, 0.33531812, 0.13119636, -0.19266993, -0.08529151, -0.068182945, -0.14552347, -0.20925495, -0.26174715, 0.42169508, -0.15332408, -0.4422107, 0.26490703, 0.044914585, 0.3462565, 0.028742079, 0.112568825, -0.049105942, 0.14644153, 0.10022157, -0.1353474, 0.2859512, 0.05943501, -0.5361389, -0.14724284, -0.22212745, 0.09529675, 0.19780394, -0.3280398, 0.03847211, 0.047032848, 0.13924433, 0.04658549, -0.087256454, -0.06858529, 0.39810196, 0.2195755, 0.27559587, 0.080366105, 0.224659, -0.037330583, -0.32662335, 0.049084876, 0.08356128, -0.2064612, 0.44953158, -0.07670108, -0.3735964, -0.07003661, 0.40409234, 0.09436887, 0.002322631, -0.058938935, 0.21089995, 0.15257117, -0.11534224, 0.1868882, -0.027790006, -0.13924241, -0.11449168, 0.07226305, -0.2269321, 0.05748148, -0.15779555, -0.004239961, -0.2076957, 0.025448453, -0.20258994, 0.26121607, -0.3375632, 0.09614588, 0.0837002, 0.2889494, -0.35512874, -0.16446145, -0.07327044, 0.162409, 0.27919483, 0.33874825, 0.03656232, 0.020300211, -0.18202537, -0.24737185, 0.062216822, -0.20302315, 0.13892165, 0.07089765, 0.22756256, -0.33009684, -0.18666261, 0.20215912, -0.09404198, -0.14015506, 0.3908819, 0.23564205, 0.2299575, 0.0022593169, 0.25940043, 0.010104028, -0.17387737, -0.12020657, -0.25978172, 0.069076784, -0.09353954, -0.057409134, -0.048858702, -0.115044445, -0.19313143, -0.15462089, 0.14306325, 0.13297117, 0.018673927, -0.05574753, -0.0076537174, -0.28154808, 0.31979483, 0.026106903, 0.04603346, -0.059027776, 0.041025206, -0.13605414, 0.23317498, 0.21174069, 0.078041025, -0.17607999, -0.051855393, -0.27796885, -0.35570046, 0.04882217, 0.13480361, 0.11094927, -0.07341316, -0.2513282, 0.017097535, -0.12939982, 0.1765655, 0.0043726726, -0.1646482, -0.09244843, -0.07229631, -0.036124315, 0.0713724, -0.20348924, -0.20200686, -0.099306956, -0.07727608, -0.06878283, 0.34491593, -0.04862456, 0.288199, -0.14932868, -0.011248313, -0.1619775, 0.12771314, -0.067322426, 0.050312262, 0.26488206, -0.45438048, -0.16701354, -0.024269667, -0.20508873, -0.13706926, -0.081109755, -0.032133017, 0.22538628, -0.35670912, 0.24337423, -0.11686166, 0.17959888, -0.07400869, -0.26138356, -0.14498967, 0.002314695, 0.2509966, -0.33920595, -0.24641253, -0.26611453, -0.093208805, -0.0814982, -0.25990567, 0.4346015, -0.12232823, -0.060464166, -0.0034285442, 0.44883457, 0.2001189, 0.1663645, 0.21496214, -0.038633876, 0.04551105, 0.11791142, -0.4636027, 0.2206974, -0.22927228, -0.118581764, 0.018132765, 0.09559669, -0.022403285, 0.03678469, -0.118074425, -0.084937155, 0.22283584, -0.37808853, -0.05847166, 0.27044755, 0.16123472, -0.26384753, 0.029716926, 0.12959477, 0.39335707, 0.088695444, -0.22262987, 0.11189321, -0.33450133, -0.040625323, -0.19362892, -0.29486585, 0.14436631, -0.07635042, 0.0817869, -0.09728282, -0.2813908, 0.21378614, -0.054669898, -0.09780386, 0.41780302, -0.0015549128, -0.1261193, 0.13937768, 0.0039213966, 0.021901237, -0.10023584, 0.2714901, 0.20122135, -0.27988607, 0.14680666, -0.13224062, -0.03425929, -0.09730296]}] |-1719102856 |[(-1548374770,0.2329717161223739),(-1320876223,0.2761524746260818),(1274183715,0.28519768414650126),(1293373212,0.2876650539432857),(-612640902,0.2991777399965483),(1634839239,0.3901714913624425),(1510101612,0.4043799951515284)] |-1548374770 |0.2329717161223739 |\n", + "+------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-----------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-------------------+-------------------------+\n", + "\n" + ] + } + ], + "source": [ + "from sparknlp.annotator.similarity.document_similarity_ranker import *\n", + "\n", + "document_assembler = DocumentAssembler() \\\n", + " .setInputCol(\"text\") \\\n", + " .setOutputCol(\"document\")\n", + "sentence_detector = SentenceDetector() \\\n", + " .setInputCols([\"document\"]) \\\n", + " .setOutputCol(\"sentence\")\n", + "tokenizer = Tokenizer() \\\n", + " .setInputCols([\"sentence\"]) \\\n", + " .setOutputCol(\"token\")\n", + "\n", + "sentence_embeddings = RoBertaSentenceEmbeddings.pretrained() \\\n", + " .setInputCols([\"document\"]) \\\n", + " .setOutputCol(\"sentence_embeddings\")\n", + "\n", + "# TODO add document_similarity_ranker with input col embeddings too\n", + "document_similarity_ranker = DocumentSimilarityRankerApproach() \\\n", + " .setInputCols(\"sentence_embeddings\") \\\n", + " .setOutputCol(\"doc_similarity_rankings\") \\\n", + " .setSimilarityMethod(\"brp\") \\\n", + " .setNumberOfNeighbours(10) \\\n", + " .setBucketLength(2.0) \\\n", + " .setNumHashTables(3) \\\n", + " .setVisibleDistances(True) \\\n", + " .setIdentityRanking(False)\n", + "\n", + "document_similarity_ranker_finisher = DocumentSimilarityRankerFinisher() \\\n", + " .setInputCols(\"doc_similarity_rankings\") \\\n", + " .setOutputCols(\n", + " \"finished_doc_similarity_rankings_id\",\n", + " \"finished_doc_similarity_rankings_neighbors\") \\\n", + " .setExtractNearestNeighbor(True)\n", + "\n", + "pipeline = Pipeline(stages=[\n", + " document_assembler,\n", + " sentence_detector,\n", + " tokenizer,\n", + " sentence_embeddings,\n", + " document_similarity_ranker,\n", + " document_similarity_ranker_finisher\n", + " ])\n", + "\n", + "model = pipeline.fit(data)\n", + "# TODO add write/read pipeline\n", + "model.transform(data).show(10, False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2cde88af", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/python/test/annotator/similarity/doc_similarity_ranker_test.py b/python/test/annotator/similarity/doc_similarity_ranker_test.py index a16441b81994a9..3bb0712b29c45a 100644 --- a/python/test/annotator/similarity/doc_similarity_ranker_test.py +++ b/python/test/annotator/similarity/doc_similarity_ranker_test.py @@ -16,10 +16,10 @@ import pytest from pyspark.sql import SparkSession from sparknlp.annotator import * -from sparknlp.annotator.similarity.document_similarity_ranker import DocumentSimilarityRankerApproach +from sparknlp.annotator.similarity.document_similarity_ranker import * from sparknlp.base import * -# from test.util import SparkSessionForTest +from test.util import SparkSessionForTest @pytest.mark.slow @@ -28,8 +28,7 @@ def setUp(self): self.spark = SparkContextForTest.spark # FIXME rollback the setting up from utility class for test - # self.data = SparkSessionForTest.spark.createDataFrame([ - self.data = spark.createDataFrame([ + self.data = SparkSessionForTest.spark.createDataFrame([ ["First document, this is my first sentence. This is my second sentence."], ["Second document, this is my second sentence. This is my second sentence."], ["Third document, climate change is arguably one of the most pressing problems of our time."], @@ -94,27 +93,33 @@ def runTest(self): .setInputCols(["document"]) \ .setOutputCol("sentence_embeddings") - # TODO add document_similarity_ranker with input col embeddings too document_similarity_ranker = DocumentSimilarityRankerApproach() \ .setInputCols("sentence_embeddings") \ .setOutputCol("doc_similarity_rankings") \ - .setSimilarityMethod("mh") \ + .setSimilarityMethod("brp") \ .setNumberOfNeighbours(10) \ + .setBucketLength(2.0) \ .setNumHashTables(3) \ .setVisibleDistances(True) \ .setIdentityRanking(False) + document_similarity_ranker_finisher = DocumentSimilarityRankerFinisher() \ + .setInputCols("doc_similarity_rankings") \ + .setOutputCols( + "finished_doc_similarity_rankings_id", + "finished_doc_similarity_rankings_neighbors") \ + .setExtractNearestNeighbor(True) + pipeline = Pipeline(stages=[ document_assembler, sentence_detector, tokenizer, sentence_embeddings, - document_similarity_ranker - # TODO add document_similarity_ranker_finisher + document_similarity_ranker, + document_similarity_ranker_finisher ]) model = pipeline.fit(self.data) - # TODO add write/read pipeline transformed = model.transform(self.data) transformed.show() diff --git a/src/main/scala/com/johnsnowlabs/nlp/finisher/DocumentSimilarityRankerFinisher.scala b/src/main/scala/com/johnsnowlabs/nlp/finisher/DocumentSimilarityRankerFinisher.scala index 3aeb7ccb9dd29b..2775d2c407f6cd 100644 --- a/src/main/scala/com/johnsnowlabs/nlp/finisher/DocumentSimilarityRankerFinisher.scala +++ b/src/main/scala/com/johnsnowlabs/nlp/finisher/DocumentSimilarityRankerFinisher.scala @@ -139,9 +139,7 @@ case class DocumentSimilarityRankerFinisher(override val uid: String) .withColumn( s"split_$neighborsColName", split(col(s"no_rounded_$neighborsColName"), ",")) - .withColumn( - "nearest_neighbor_id", - element_at(col(s"split_$neighborsColName"), 1).cast(IntegerType)) + .withColumn("nearest_neighbor_id", element_at(col(s"split_$neighborsColName"), 1).cast(IntegerType)) .withColumn("nearest_neighbor_distance", element_at(col(s"split_$neighborsColName"), 2)) else formatted diff --git a/src/test/scala/com/johnsnowlabs/nlp/similarity/DocumentSimilarityRankerTestSpec.scala b/src/test/scala/com/johnsnowlabs/nlp/similarity/DocumentSimilarityRankerTestSpec.scala index cee7d07fc80c96..415eb9895c1999 100644 --- a/src/test/scala/com/johnsnowlabs/nlp/similarity/DocumentSimilarityRankerTestSpec.scala +++ b/src/test/scala/com/johnsnowlabs/nlp/similarity/DocumentSimilarityRankerTestSpec.scala @@ -99,16 +99,16 @@ class DocumentSimilarityRankerTestSpec extends AnyFlatSpec { // transformed.printSchema transformed - .select( - "text", - // "finished_sentence_embeddings", - "finished_doc_similarity_rankings_id", - "nearest_neighbor_id", - "nearest_neighbor_distance") +// .select( +// "text", +// // "finished_sentence_embeddings", +// "finished_doc_similarity_rankings_id", +// "nearest_neighbor_id", +// "nearest_neighbor_distance") .show(false) // correct if not empty as inclusive query points are at distance 0.0 from themselves - assert(!transformed.where(col("nearest_neighbor_distance") === 0.0).rdd.isEmpty() == true) +// assert(!transformed.where(col("nearest_neighbor_distance") === 0.0).rdd.isEmpty() == true) } "DocumentSimilarityRanker" should "should use min hash to rank document similarity" taggedAs SlowTest in { From f1a8e3819dbe06af7029d7a863edd16cdd9f9997 Mon Sep 17 00:00:00 2001 From: Stefano Lori Date: Mon, 19 Jun 2023 21:29:57 +0200 Subject: [PATCH 24/26] stabilized tests for doc sim ranker --- .../similarity/doc_similarity_ranker_test.py | 113 ++------------- .../DocumentSimilarityRankerFinisher.scala | 4 +- .../DocumentSimilarityRankerTestSpec.scala | 135 +++++++++++++----- 3 files changed, 116 insertions(+), 136 deletions(-) diff --git a/python/test/annotator/similarity/doc_similarity_ranker_test.py b/python/test/annotator/similarity/doc_similarity_ranker_test.py index 3bb0712b29c45a..f9a93f4d12ee2d 100644 --- a/python/test/annotator/similarity/doc_similarity_ranker_test.py +++ b/python/test/annotator/similarity/doc_similarity_ranker_test.py @@ -14,20 +14,18 @@ import unittest import pytest -from pyspark.sql import SparkSession + from sparknlp.annotator import * from sparknlp.annotator.similarity.document_similarity_ranker import * from sparknlp.base import * - from test.util import SparkSessionForTest @pytest.mark.slow class DocumentSimilarityRankerTestSpec(unittest.TestCase): def setUp(self): - self.spark = SparkContextForTest.spark + self.spark = SparkSessionForTest.spark - # FIXME rollback the setting up from utility class for test self.data = SparkSessionForTest.spark.createDataFrame([ ["First document, this is my first sentence. This is my second sentence."], ["Second document, this is my second sentence. This is my second sentence."], @@ -39,45 +37,6 @@ def setUp(self): ["Eighth document, the warmest place in France is the French Riviera coast in Southern France."] ]).toDF("text") - def runTest(self): - document_assembler = DocumentAssembler() \ - .setInputCol("text") \ - .setOutputCol("document") - sentence_detector = SentenceDetector() \ - .setInputCols(["document"]) \ - .setOutputCol("sentence") - tokenizer = Tokenizer() \ - .setInputCols(["sentence"]) \ - .setOutputCol("token") - - sentence_embeddings = RoBertaSentenceEmbeddings.pretrained() \ - .setInputCols(["document"]) \ - .setOutputCol("sentence_embeddings") - - # TODO add document_similarity_ranker with input col embeddings too - document_similarity_ranker = DocumentSimilarityRankerApproach() \ - .setInputCols("sentence_embeddings") \ - .setOutputCol("doc_similarity_rankings") \ - .setSimilarityMethod("brp") \ - .setNumberOfNeighbours(10) \ - .setBucketLength(2.0) \ - .setNumHashTables(3) \ - .setVisibleDistances(True) \ - .setIdentityRanking(False) - - pipeline = Pipeline(stages=[ - document_assembler, - sentence_detector, - tokenizer, - sentence_embeddings, - document_similarity_ranker - # TODO add document_similarity_ranker_finisher - ]) - - model = pipeline.fit(self.data) - # TODO add write/read pipeline - model.transform(self.data).show() - def runTest(self): document_assembler = DocumentAssembler() \ .setInputCol("text") \ @@ -101,7 +60,7 @@ def runTest(self): .setBucketLength(2.0) \ .setNumHashTables(3) \ .setVisibleDistances(True) \ - .setIdentityRanking(False) + .setIdentityRanking(True) document_similarity_ranker_finisher = DocumentSimilarityRankerFinisher() \ .setInputCols("doc_similarity_rankings") \ @@ -120,62 +79,12 @@ def runTest(self): ]) model = pipeline.fit(self.data) - transformed = model.transform(self.data) - transformed.show() - # FIXME encoding on GloVe generates different embeddings length - # def runTest(self): - # document_assembler = DocumentAssembler() \ - # .setInputCol("text") \ - # .setOutputCol("document") - # sentence_detector = SentenceDetector() \ - # .setInputCols(["document"]) \ - # .setOutputCol("sentence") - # tokenizer = Tokenizer() \ - # .setInputCols(["sentence"]) \ - # .setOutputCol("token") - # - # glove = WordEmbeddingsModel.pretrained() \ - # .setInputCols(["sentence", "token"]) \ - # .setOutputCol("embeddings") - # - # sentence_embeddings = SentenceEmbeddings() \ - # .setInputCols(["sentence", "embeddings"]) \ - # .setOutputCol("sentence_embeddings") \ - # .setPoolingStrategy("AVERAGE") - # - # document_similarity_ranker = DocumentSimilarityRankerApproach() \ - # .setInputCols("sentence_embeddings") \ - # .setOutputCol("doc_similarity_rankings") \ - # .setSimilarityMethod("brp") \ - # .setNumberOfNeighbours(10) \ - # .setBucketLength(2.0) \ - # .setNumHashTables(3) \ - # .setVisibleDistances(True) \ - # .setIdentityRanking(True) - # - # print(document_similarity_ranker.__dict__) - # - # # documentSimilarityFinisher = ( - # # DocumentSimilarityRankerFinisher() - # # .setInputCols("doc_similarity_rankings") - # # .setOutputCols( - # # "finished_doc_similarity_rankings_id", - # # "finished_doc_similarity_rankings_neighbors") - # # .setExtractNearestNeighbor(True) - # # ) - # - # pipeline = Pipeline(stages=[ - # document_assembler, - # sentence_detector, - # tokenizer, - # glove, - # sentence_embeddings, - # document_similarity_ranker - # ]) - # - # model = pipeline.fit(self.data) - # # model.write().overwrite().save("./tmp_model") - # # loaded_model = model.load("./tmp_model") - # # loaded_model.transform(self.data).show() - # model.transform(self.data).show() + ( + model + .transform(self.data) + .select("text", + "finished_doc_similarity_rankings_id", + "finished_doc_similarity_rankings_neighbors") + .show(10, False) + ) \ No newline at end of file diff --git a/src/main/scala/com/johnsnowlabs/nlp/finisher/DocumentSimilarityRankerFinisher.scala b/src/main/scala/com/johnsnowlabs/nlp/finisher/DocumentSimilarityRankerFinisher.scala index 2775d2c407f6cd..3aeb7ccb9dd29b 100644 --- a/src/main/scala/com/johnsnowlabs/nlp/finisher/DocumentSimilarityRankerFinisher.scala +++ b/src/main/scala/com/johnsnowlabs/nlp/finisher/DocumentSimilarityRankerFinisher.scala @@ -139,7 +139,9 @@ case class DocumentSimilarityRankerFinisher(override val uid: String) .withColumn( s"split_$neighborsColName", split(col(s"no_rounded_$neighborsColName"), ",")) - .withColumn("nearest_neighbor_id", element_at(col(s"split_$neighborsColName"), 1).cast(IntegerType)) + .withColumn( + "nearest_neighbor_id", + element_at(col(s"split_$neighborsColName"), 1).cast(IntegerType)) .withColumn("nearest_neighbor_distance", element_at(col(s"split_$neighborsColName"), 2)) else formatted diff --git a/src/test/scala/com/johnsnowlabs/nlp/similarity/DocumentSimilarityRankerTestSpec.scala b/src/test/scala/com/johnsnowlabs/nlp/similarity/DocumentSimilarityRankerTestSpec.scala index 415eb9895c1999..ccdd8294db6471 100644 --- a/src/test/scala/com/johnsnowlabs/nlp/similarity/DocumentSimilarityRankerTestSpec.scala +++ b/src/test/scala/com/johnsnowlabs/nlp/similarity/DocumentSimilarityRankerTestSpec.scala @@ -5,14 +5,14 @@ import com.johnsnowlabs.nlp.annotators.Tokenizer import com.johnsnowlabs.nlp.annotators.sbd.pragmatic.SentenceDetector import com.johnsnowlabs.nlp.annotators.similarity.DocumentSimilarityRankerApproach import com.johnsnowlabs.nlp.base.DocumentAssembler -import com.johnsnowlabs.nlp.embeddings.SentenceEmbeddings +import com.johnsnowlabs.nlp.embeddings.{AlbertEmbeddings, SentenceEmbeddings} import com.johnsnowlabs.nlp.finisher.DocumentSimilarityRankerFinisher import com.johnsnowlabs.nlp.util.io.ResourceHelper import com.johnsnowlabs.nlp.{AnnotatorBuilder, EmbeddingsFinisher} import com.johnsnowlabs.tags.SlowTest import org.apache.spark.ml.{Pipeline, PipelineModel} -import org.apache.spark.sql.SparkSession -import org.apache.spark.sql.functions.col +import org.apache.spark.sql.{SparkSession, functions} +import org.apache.spark.sql.functions.{col, element_at, size} import org.scalatest.flatspec.AnyFlatSpec class DocumentSimilarityRankerTestSpec extends AnyFlatSpec { @@ -46,11 +46,10 @@ class DocumentSimilarityRankerTestSpec extends AnyFlatSpec { .setInputCols(Array("document")) .setOutputCol("token") - val embeddings = AnnotatorBuilder - .getGLoveEmbeddings(smallCorpus) - .setInputCols("document", "token") + val embeddings = AlbertEmbeddings + .pretrained() + .setInputCols("sentence", "token") .setOutputCol("embeddings") - .setCaseSensitive(false) val embeddingsSentence = new SentenceEmbeddings() .setInputCols(Array("document", "embeddings")) @@ -68,7 +67,7 @@ class DocumentSimilarityRankerTestSpec extends AnyFlatSpec { .setSimilarityMethod("brp") .setNumberOfNeighbours(3) .setVisibleDistances(true) - .setIdentityRanking(false) + .setIdentityRanking(true) val documentSimilarityFinisher = new DocumentSimilarityRankerFinisher() .setInputCols("doc_similarity_rankings") @@ -97,18 +96,10 @@ class DocumentSimilarityRankerTestSpec extends AnyFlatSpec { val transformed = pipelineModel.transform(smallCorpus) - // transformed.printSchema - transformed -// .select( -// "text", -// // "finished_sentence_embeddings", -// "finished_doc_similarity_rankings_id", -// "nearest_neighbor_id", -// "nearest_neighbor_distance") - .show(false) + transformed.select("text", "finished_sentence_embeddings").show() // correct if not empty as inclusive query points are at distance 0.0 from themselves -// assert(!transformed.where(col("nearest_neighbor_distance") === 0.0).rdd.isEmpty() == true) + assert(!transformed.where(col("nearest_neighbor_distance") === 0.0).rdd.isEmpty() == true) } "DocumentSimilarityRanker" should "should use min hash to rank document similarity" taggedAs SlowTest in { @@ -139,11 +130,10 @@ class DocumentSimilarityRankerTestSpec extends AnyFlatSpec { .setInputCols(Array("document")) .setOutputCol("token") - val embeddings = AnnotatorBuilder - .getGLoveEmbeddings(smallCorpus) - .setInputCols("document", "token") + val embeddings = AlbertEmbeddings + .pretrained() + .setInputCols("sentence", "token") .setOutputCol("embeddings") - .setCaseSensitive(false) val embeddingsSentence = new SentenceEmbeddings() .setInputCols(Array("document", "embeddings")) @@ -161,7 +151,7 @@ class DocumentSimilarityRankerTestSpec extends AnyFlatSpec { .setSimilarityMethod("mh") .setNumberOfNeighbours(3) .setVisibleDistances(true) - .setIdentityRanking(false) + .setIdentityRanking(true) val documentSimilarityFinisher = new DocumentSimilarityRankerFinisher() .setInputCols("doc_similarity_rankings") @@ -190,17 +180,96 @@ class DocumentSimilarityRankerTestSpec extends AnyFlatSpec { val transformed = pipelineModel.transform(smallCorpus) - // transformed.printSchema - transformed - .select( - "text", - // "finished_sentence_embeddings", - "finished_doc_similarity_rankings_id", - "nearest_neighbor_id", - "nearest_neighbor_distance") - .show(false) - // correct if not empty as inclusive query points are at distance 0.0 from themselves assert(!transformed.where(col("nearest_neighbor_distance") === 0.0).rdd.isEmpty() == true) } + + "Databricks pipeline" should "should use min hash to rank document similarity" taggedAs SlowTest in { + import com.johnsnowlabs.nlp.AnnotatorType.DOC_SIMILARITY_RANKINGS + import com.johnsnowlabs.nlp.annotators.Tokenizer + import com.johnsnowlabs.nlp.annotators.sbd.pragmatic.SentenceDetector + import com.johnsnowlabs.nlp.annotators.similarity.DocumentSimilarityRankerApproach + import com.johnsnowlabs.nlp.base.DocumentAssembler + import com.johnsnowlabs.nlp.embeddings.{AlbertEmbeddings, SentenceEmbeddings} + import com.johnsnowlabs.nlp.finisher.DocumentSimilarityRankerFinisher + import com.johnsnowlabs.nlp.util.io.ResourceHelper + import com.johnsnowlabs.nlp.EmbeddingsFinisher + import org.apache.spark.ml.{Pipeline, PipelineModel} + + val smallCorpus = spark + .createDataFrame( + List( + "First document, this is my first sentence. This is my second sentence.", + "Second document, this is my second sentence. This is my second sentence.", + "Third document, climate change is arguably one of the most pressing problems of our time.", + "Fourth document, climate change is definitely one of the most pressing problems of our time.", + "Fifth document, Florence in Italy, is among the most beautiful cities in Europe.", + "Sixth document, Florence in Italy, is a very beautiful city in Europe like Lyon in France.", + "Seventh document, the French Riviera is the Mediterranean coastline of the southeast corner of France.", + "Eighth document, the warmest place in France is the French Riviera coast in Southern France.") + .map(Tuple1(_))) + .toDF("text") + + val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + + val sentence = new SentenceDetector() + .setInputCols("document") + .setOutputCol("sentence") + + val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + + val embeddings = AlbertEmbeddings + .pretrained() + .setInputCols("sentence", "token") + .setOutputCol("embeddings") + + val embeddingsSentence = new SentenceEmbeddings() + .setInputCols(Array("document", "embeddings")) + .setOutputCol("sentence_embeddings") + .setPoolingStrategy("AVERAGE") + + val sentenceFinisher = new EmbeddingsFinisher() + .setInputCols("sentence_embeddings") + .setOutputCols("finished_sentence_embeddings") + .setCleanAnnotations(false) + + val docSimilarityRanker = new DocumentSimilarityRankerApproach() + .setInputCols("sentence_embeddings") + .setOutputCol(DOC_SIMILARITY_RANKINGS) + .setSimilarityMethod("brp") + .setNumberOfNeighbours(3) + .setVisibleDistances(true) + .setIdentityRanking(true) + + val documentSimilarityFinisher = new DocumentSimilarityRankerFinisher() + .setInputCols("doc_similarity_rankings") + .setOutputCols( + "finished_doc_similarity_rankings_id", + "finished_doc_similarity_rankings_neighbors") + .setExtractNearestNeighbor(true) + + val pipeline = new Pipeline() + .setStages( + Array( + documentAssembler, + sentence, + tokenizer, + embeddings, + embeddingsSentence, + sentenceFinisher, + docSimilarityRanker, + documentSimilarityFinisher)) + + val transformed = pipeline.fit(smallCorpus).transform(smallCorpus) + + transformed + .select("text", "sentence_embeddings.embeddings") + .withColumn("extracted_embeddings", element_at(col("embeddings"), 1)) + .withColumn("embeddings_size", size(col("extracted_embeddings"))) + .show(10, false) + } } From d8f4ed9c6472165430d6cecb92cd37b5ee9e571a Mon Sep 17 00:00:00 2001 From: Stefano Lori Date: Sat, 1 Jul 2023 22:02:31 +0200 Subject: [PATCH 25/26] Moved and enriched test for doc sim ranker --- .gitignore | 1 + .../doc-sim-ranker/test_doc_sim_ranker.ipynb | 542 ++++++++++++++++++ python/sparknlp/lib/test_doc_sim_ranker.ipynb | 223 ------- 3 files changed, 543 insertions(+), 223 deletions(-) create mode 100644 examples/python/annotation/text/english/text-similarity/doc-sim-ranker/test_doc_sim_ranker.ipynb delete mode 100644 python/sparknlp/lib/test_doc_sim_ranker.ipynb diff --git a/.gitignore b/.gitignore index aa264460d82d79..e91a8952f7c8d5 100644 --- a/.gitignore +++ b/.gitignore @@ -338,3 +338,4 @@ python/docs/reference/_autosummary/** # MS Visio Code **/.vscode/ +.metals/ \ No newline at end of file diff --git a/examples/python/annotation/text/english/text-similarity/doc-sim-ranker/test_doc_sim_ranker.ipynb b/examples/python/annotation/text/english/text-similarity/doc-sim-ranker/test_doc_sim_ranker.ipynb new file mode 100644 index 00000000000000..121018b6eb2a9f --- /dev/null +++ b/examples/python/annotation/text/english/text-similarity/doc-sim-ranker/test_doc_sim_ranker.ipynb @@ -0,0 +1,542 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "c3dc7ce5", + "metadata": {}, + "source": [ + "# Document Similarity Ranker for Spark NLP\n", + "### Efficient approximate nearest neighbor search on top of sentence embeddings" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "1a9dd32e", + "metadata": {}, + "outputs": [], + "source": [ + "# Import Spark NLP classes\n", + "from sparknlp.base import *\n", + "from sparknlp.annotator import *\n", + "from sparknlp.pretrained import PretrainedPipeline" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "82846deb", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + ":: loading settings :: url = jar:file:/Users/stefanolori/opt/anaconda3/envs/spknlp/lib/python3.8/site-packages/pyspark/jars/ivy-2.5.0.jar!/org/apache/ivy/core/settings/ivysettings.xml\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Ivy Default Cache set to: /Users/stefanolori/.ivy2/cache\n", + "The jars for the packages stored in: /Users/stefanolori/.ivy2/jars\n", + "com.johnsnowlabs.nlp#spark-nlp_2.12 added as a dependency\n", + ":: resolving dependencies :: org.apache.spark#spark-submit-parent-d858c4fe-292f-4adf-8944-9ebef53c59cd;1.0\n", + "\tconfs: [default]\n", + "\tfound com.johnsnowlabs.nlp#spark-nlp_2.12;4.4.4 in local-ivy-cache\n", + "\tfound com.typesafe#config;1.4.2 in local-m2-cache\n", + "\tfound org.rocksdb#rocksdbjni;6.29.5 in central\n", + "\tfound com.amazonaws#aws-java-sdk-bundle;1.11.828 in central\n", + "\tfound com.github.universal-automata#liblevenshtein;3.0.0 in central\n", + "\tfound com.google.protobuf#protobuf-java-util;3.0.0-beta-3 in central\n", + "\tfound com.google.protobuf#protobuf-java;3.0.0-beta-3 in central\n", + "\tfound com.google.code.gson#gson;2.3 in central\n", + "\tfound it.unimi.dsi#fastutil;7.0.12 in central\n", + "\tfound org.projectlombok#lombok;1.16.8 in central\n", + "\tfound com.google.cloud#google-cloud-storage;2.16.0 in central\n", + "\tfound com.google.guava#guava;31.1-jre in central\n", + "\tfound com.google.guava#failureaccess;1.0.1 in central\n", + "\tfound com.google.guava#listenablefuture;9999.0-empty-to-avoid-conflict-with-guava in central\n", + "\tfound com.google.errorprone#error_prone_annotations;2.16 in central\n", + "\tfound com.google.j2objc#j2objc-annotations;1.3 in central\n", + "\tfound com.google.http-client#google-http-client;1.42.3 in central\n", + "\tfound io.opencensus#opencensus-contrib-http-util;0.31.1 in central\n", + "\tfound com.google.http-client#google-http-client-jackson2;1.42.3 in central\n", + "\tfound com.google.http-client#google-http-client-gson;1.42.3 in central\n", + "\tfound com.google.api-client#google-api-client;2.1.1 in central\n", + "\tfound commons-codec#commons-codec;1.15 in central\n", + "\tfound com.google.oauth-client#google-oauth-client;1.34.1 in central\n", + "\tfound com.google.http-client#google-http-client-apache-v2;1.42.3 in central\n", + "\tfound com.google.apis#google-api-services-storage;v1-rev20220705-2.0.0 in central\n", + "\tfound com.google.code.gson#gson;2.10 in central\n", + "\tfound com.google.cloud#google-cloud-core;2.9.0 in central\n", + "\tfound com.google.auto.value#auto-value-annotations;1.10.1 in central\n", + "\tfound com.google.cloud#google-cloud-core-http;2.9.0 in central\n", + "\tfound com.google.http-client#google-http-client-appengine;1.42.3 in central\n", + "\tfound com.google.api#gax-httpjson;0.105.1 in central\n", + "\tfound com.google.cloud#google-cloud-core-grpc;2.9.0 in central\n", + "\tfound io.grpc#grpc-core;1.51.0 in central\n", + "\tfound com.google.api#gax;2.20.1 in central\n", + "\tfound com.google.api#gax-grpc;2.20.1 in central\n", + "\tfound io.grpc#grpc-alts;1.51.0 in central\n", + "\tfound io.grpc#grpc-grpclb;1.51.0 in central\n", + "\tfound org.conscrypt#conscrypt-openjdk-uber;2.5.2 in central\n", + "\tfound io.grpc#grpc-protobuf;1.51.0 in central\n", + "\tfound com.google.auth#google-auth-library-credentials;1.13.0 in central\n", + "\tfound com.google.auth#google-auth-library-oauth2-http;1.13.0 in central\n", + "\tfound com.google.api#api-common;2.2.2 in central\n", + "\tfound javax.annotation#javax.annotation-api;1.3.2 in local-m2-cache\n", + "\tfound io.opencensus#opencensus-api;0.31.1 in central\n", + "\tfound io.grpc#grpc-context;1.51.0 in central\n", + "\tfound com.google.api.grpc#proto-google-iam-v1;1.6.22 in central\n", + "\tfound com.google.protobuf#protobuf-java;3.21.10 in central\n", + "\tfound com.google.protobuf#protobuf-java-util;3.21.10 in central\n", + "\tfound com.google.api.grpc#proto-google-common-protos;2.11.0 in central\n", + "\tfound org.threeten#threetenbp;1.6.4 in central\n", + "\tfound com.google.api.grpc#proto-google-cloud-storage-v2;2.16.0-alpha in central\n", + "\tfound com.google.api.grpc#grpc-google-cloud-storage-v2;2.16.0-alpha in central\n", + "\tfound com.google.api.grpc#gapic-google-cloud-storage-v2;2.16.0-alpha in central\n", + "\tfound com.fasterxml.jackson.core#jackson-core;2.14.1 in central\n", + "\tfound com.google.code.findbugs#jsr305;3.0.2 in central\n", + "\tfound io.grpc#grpc-api;1.51.0 in central\n", + "\tfound io.grpc#grpc-auth;1.51.0 in central\n", + "\tfound io.grpc#grpc-stub;1.51.0 in central\n", + "\tfound org.checkerframework#checker-qual;3.28.0 in central\n", + "\tfound com.google.api.grpc#grpc-google-iam-v1;1.6.22 in central\n", + "\tfound io.grpc#grpc-protobuf-lite;1.51.0 in central\n", + "\tfound com.google.android#annotations;4.1.1.4 in central\n", + "\tfound org.codehaus.mojo#animal-sniffer-annotations;1.22 in central\n", + "\tfound io.grpc#grpc-netty-shaded;1.51.0 in central\n", + "\tfound io.perfmark#perfmark-api;0.26.0 in central\n", + "\tfound io.grpc#grpc-googleapis;1.51.0 in central\n", + "\tfound io.grpc#grpc-xds;1.51.0 in central\n", + "\tfound io.opencensus#opencensus-proto;0.2.0 in central\n", + "\tfound io.grpc#grpc-services;1.51.0 in central\n", + "\tfound com.google.re2j#re2j;1.6 in central\n", + "\tfound com.navigamez#greex;1.0 in central\n", + "\tfound dk.brics.automaton#automaton;1.11-8 in central\n", + "\tfound com.johnsnowlabs.nlp#tensorflow-cpu_2.12;0.4.4 in central\n", + ":: resolution report :: resolve 1092ms :: artifacts dl 43ms\n", + "\t:: modules in use:\n", + "\tcom.amazonaws#aws-java-sdk-bundle;1.11.828 from central in [default]\n", + "\tcom.fasterxml.jackson.core#jackson-core;2.14.1 from central in [default]\n", + "\tcom.github.universal-automata#liblevenshtein;3.0.0 from central in [default]\n", + "\tcom.google.android#annotations;4.1.1.4 from central in [default]\n", + "\tcom.google.api#api-common;2.2.2 from central in [default]\n", + "\tcom.google.api#gax;2.20.1 from central in [default]\n", + "\tcom.google.api#gax-grpc;2.20.1 from central in [default]\n", + "\tcom.google.api#gax-httpjson;0.105.1 from central in [default]\n", + "\tcom.google.api-client#google-api-client;2.1.1 from central in [default]\n", + "\tcom.google.api.grpc#gapic-google-cloud-storage-v2;2.16.0-alpha from central in [default]\n", + "\tcom.google.api.grpc#grpc-google-cloud-storage-v2;2.16.0-alpha from central in [default]\n", + "\tcom.google.api.grpc#grpc-google-iam-v1;1.6.22 from central in [default]\n", + "\tcom.google.api.grpc#proto-google-cloud-storage-v2;2.16.0-alpha from central in [default]\n", + "\tcom.google.api.grpc#proto-google-common-protos;2.11.0 from central in [default]\n", + "\tcom.google.api.grpc#proto-google-iam-v1;1.6.22 from central in [default]\n", + "\tcom.google.apis#google-api-services-storage;v1-rev20220705-2.0.0 from central in [default]\n", + "\tcom.google.auth#google-auth-library-credentials;1.13.0 from central in [default]\n", + "\tcom.google.auth#google-auth-library-oauth2-http;1.13.0 from central in [default]\n", + "\tcom.google.auto.value#auto-value-annotations;1.10.1 from central in [default]\n", + "\tcom.google.cloud#google-cloud-core;2.9.0 from central in [default]\n", + "\tcom.google.cloud#google-cloud-core-grpc;2.9.0 from central in [default]\n", + "\tcom.google.cloud#google-cloud-core-http;2.9.0 from central in [default]\n", + "\tcom.google.cloud#google-cloud-storage;2.16.0 from central in [default]\n", + "\tcom.google.code.findbugs#jsr305;3.0.2 from central in [default]\n", + "\tcom.google.code.gson#gson;2.10 from central in [default]\n", + "\tcom.google.errorprone#error_prone_annotations;2.16 from central in [default]\n", + "\tcom.google.guava#failureaccess;1.0.1 from central in [default]\n", + "\tcom.google.guava#guava;31.1-jre from central in [default]\n", + "\tcom.google.guava#listenablefuture;9999.0-empty-to-avoid-conflict-with-guava from central in [default]\n", + "\tcom.google.http-client#google-http-client;1.42.3 from central in [default]\n", + "\tcom.google.http-client#google-http-client-apache-v2;1.42.3 from central in [default]\n", + "\tcom.google.http-client#google-http-client-appengine;1.42.3 from central in [default]\n", + "\tcom.google.http-client#google-http-client-gson;1.42.3 from central in [default]\n", + "\tcom.google.http-client#google-http-client-jackson2;1.42.3 from central in [default]\n", + "\tcom.google.j2objc#j2objc-annotations;1.3 from central in [default]\n", + "\tcom.google.oauth-client#google-oauth-client;1.34.1 from central in [default]\n", + "\tcom.google.protobuf#protobuf-java;3.21.10 from central in [default]\n", + "\tcom.google.protobuf#protobuf-java-util;3.21.10 from central in [default]\n", + "\tcom.google.re2j#re2j;1.6 from central in [default]\n", + "\tcom.johnsnowlabs.nlp#spark-nlp_2.12;4.4.4 from local-ivy-cache in [default]\n", + "\tcom.johnsnowlabs.nlp#tensorflow-cpu_2.12;0.4.4 from central in [default]\n", + "\tcom.navigamez#greex;1.0 from central in [default]\n", + "\tcom.typesafe#config;1.4.2 from local-m2-cache in [default]\n", + "\tcommons-codec#commons-codec;1.15 from central in [default]\n", + "\tdk.brics.automaton#automaton;1.11-8 from central in [default]\n", + "\tio.grpc#grpc-alts;1.51.0 from central in [default]\n", + "\tio.grpc#grpc-api;1.51.0 from central in [default]\n", + "\tio.grpc#grpc-auth;1.51.0 from central in [default]\n", + "\tio.grpc#grpc-context;1.51.0 from central in [default]\n", + "\tio.grpc#grpc-core;1.51.0 from central in [default]\n", + "\tio.grpc#grpc-googleapis;1.51.0 from central in [default]\n", + "\tio.grpc#grpc-grpclb;1.51.0 from central in [default]\n", + "\tio.grpc#grpc-netty-shaded;1.51.0 from central in [default]\n", + "\tio.grpc#grpc-protobuf;1.51.0 from central in [default]\n", + "\tio.grpc#grpc-protobuf-lite;1.51.0 from central in [default]\n", + "\tio.grpc#grpc-services;1.51.0 from central in [default]\n", + "\tio.grpc#grpc-stub;1.51.0 from central in [default]\n", + "\tio.grpc#grpc-xds;1.51.0 from central in [default]\n", + "\tio.opencensus#opencensus-api;0.31.1 from central in [default]\n", + "\tio.opencensus#opencensus-contrib-http-util;0.31.1 from central in [default]\n", + "\tio.opencensus#opencensus-proto;0.2.0 from central in [default]\n", + "\tio.perfmark#perfmark-api;0.26.0 from central in [default]\n", + "\tit.unimi.dsi#fastutil;7.0.12 from central in [default]\n", + "\tjavax.annotation#javax.annotation-api;1.3.2 from local-m2-cache in [default]\n", + "\torg.checkerframework#checker-qual;3.28.0 from central in [default]\n", + "\torg.codehaus.mojo#animal-sniffer-annotations;1.22 from central in [default]\n", + "\torg.conscrypt#conscrypt-openjdk-uber;2.5.2 from central in [default]\n", + "\torg.projectlombok#lombok;1.16.8 from central in [default]\n", + "\torg.rocksdb#rocksdbjni;6.29.5 from central in [default]\n", + "\torg.threeten#threetenbp;1.6.4 from central in [default]\n", + "\t:: evicted modules:\n", + "\tcom.google.protobuf#protobuf-java-util;3.0.0-beta-3 by [com.google.protobuf#protobuf-java-util;3.21.10] in [default]\n", + "\tcom.google.protobuf#protobuf-java;3.0.0-beta-3 by [com.google.protobuf#protobuf-java;3.21.10] in [default]\n", + "\tcom.google.code.gson#gson;2.3 by [com.google.code.gson#gson;2.10] in [default]\n", + "\t---------------------------------------------------------------------\n", + "\t| | modules || artifacts |\n", + "\t| conf | number| search|dwnlded|evicted|| number|dwnlded|\n", + "\t---------------------------------------------------------------------\n", + "\t| default | 73 | 0 | 0 | 3 || 70 | 0 |\n", + "\t---------------------------------------------------------------------\n", + ":: retrieving :: org.apache.spark#spark-submit-parent-d858c4fe-292f-4adf-8944-9ebef53c59cd\n", + "\tconfs: [default]\n", + "\t0 artifacts copied, 70 already retrieved (0kB/16ms)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "23/07/01 22:00:42 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Setting default log level to \"WARN\".\n", + "To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).\n" + ] + } + ], + "source": [ + "# Create the PySpark session\n", + "from pyspark.sql import SparkSession\n", + "\n", + "spark = SparkSession.builder \\\n", + " .appName(\"Spark NLP\")\\\n", + " .master(\"local[*]\")\\\n", + " .config(\"spark.driver.memory\",\"16G\")\\\n", + " .config(\"spark.driver.maxResultSize\", \"0\") \\\n", + " .config(\"spark.kryoserializer.buffer.max\", \"2000M\")\\\n", + " .config(\"spark.jars.packages\", \"com.johnsnowlabs.nlp:spark-nlp_2.12:4.4.4\")\\\n", + " .getOrCreate()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "a3f563d5", + "metadata": {}, + "outputs": [], + "source": [ + "# Let's use some dataset where we can visually control similarity\n", + "# Documents are coupled, as 1-2, 3-4, 5-6, 7-8 and they were voluntarily created similar\n", + "data = spark.createDataFrame(\n", + " [\n", + " [\"First document, this is my first sentence. This is my second sentence.\"],\n", + " [\"Second document, this is my second sentence. This is my second sentence.\"],\n", + " [\"Third document, climate change is arguably one of the most pressing problems of our time.\"],\n", + " [\"Fourth document, climate change is definitely one of the most pressing problems of our time.\"],\n", + " [\"Fifth document, Florence in Italy, is among the most beautiful cities in Europe.\"],\n", + " [\"Sixth document, Florence in Italy, is a very beautiful city in Europe like Lyon in France.\"],\n", + " [\"Seventh document, the French Riviera is the Mediterranean coastline of the southeast corner of France.\"],\n", + " [\"Eighth document, the warmest place in France is the French Riviera coast in Southern France.\"]\n", + " ]\n", + " ).toDF(\"text\")" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "34604126", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[Stage 0:> (0 + 1) / 1]\r" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+------------------------------------------------------------------------------------------------------+\n", + "|text |\n", + "+------------------------------------------------------------------------------------------------------+\n", + "|First document, this is my first sentence. This is my second sentence. |\n", + "|Second document, this is my second sentence. This is my second sentence. |\n", + "|Third document, climate change is arguably one of the most pressing problems of our time. |\n", + "|Fourth document, climate change is definitely one of the most pressing problems of our time. |\n", + "|Fifth document, Florence in Italy, is among the most beautiful cities in Europe. |\n", + "|Sixth document, Florence in Italy, is a very beautiful city in Europe like Lyon in France. |\n", + "|Seventh document, the French Riviera is the Mediterranean coastline of the southeast corner of France.|\n", + "|Eighth document, the warmest place in France is the French Riviera coast in Southern France. |\n", + "+------------------------------------------------------------------------------------------------------+\n", + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " \r" + ] + } + ], + "source": [ + "data.show(10, False)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "945e787d", + "metadata": {}, + "source": [ + "## A document similarity ranker pipeline\n", + "### The document similarity ranker works downstream of other annotators generating sentence embeddings. In this example we'll use RoBertaSentenceEmbeddings.\n", + "The pipeline will use the following steps:\n", + "- document_assembler to annotate the documents\n", + "- sentence_detector to detect sentences\n", + "- tokenizer to apply tokenization\n", + "- sentence_embeddings to created the necessary sentence embeddings representation\n", + "- document_similarity_ranker to extract the simlar documents via annotator configuration\n", + "- document_similarity_ranker_finisher to extract the column of interest for this new annotator" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "d4d2bd1d", + "metadata": {}, + "source": [ + "## DocumentSimilarityRankerApproach: input parameter setters overview\n", + "- setInputCols(\"sentence_embeddings\") : this setter will address input column\n", + "- setOutputCol(\"doc_similarity_rankings\") : this setter will address ouput column\n", + "- setSimilarityMethod(\"brp\") : this setter will select the LSH method (lsh|mh) used to apply approximate nearest neigbours search\n", + "- setNumberOfNeighbours(10) : this setter will address the desired number of similar documents for a given document in the set\n", + "- setBucketLength(2.0) : LSH parameter used to control the average size of hash buckets and improve recall\n", + "- setNumHashTables(3) : LSH parameter used to control number of hash tables used in LSH OR-amplification and improve recall\n", + "- setVisibleDistances(True) : this setter will make distances visible in the result, useful for debugging level information\n", + "- setIdentityRanking(False) : this setter will make identity distance (0.0) visible, useful for debugging level information" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "0b36d5cd", + "metadata": {}, + "source": [ + "## DocumentSimilarityRankerFinisher: out parameters overview\n", + "- setInputCols(\"doc_similarity_rankings\") : this setter will read the result column to extract IDs and distances\n", + "- setOutputCols(\n", + " \"finished_doc_similarity_rankings_id\",\n", + " \"finished_doc_similarity_rankings_neighbors\") : this setter selects the column with the document query ID and the neighbors document that results from the search run" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "9a8f9eae", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "sent_roberta_base download started this may take some time.\n", + "Approximate size to download 284.8 MB\n", + "[ | ]sent_roberta_base download started this may take some time.\n", + "Approximate size to download 284.8 MB\n", + "Download done! Loading the resource.\n", + "[ / ]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-07-01 22:01:11.233544: I external/org_tensorflow/tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n", + "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[ \\ ]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING: An illegal reflective access operation has occurred\n", + "WARNING: Illegal reflective access by org.apache.spark.util.SizeEstimator$ (file:/Users/stefanolori/opt/anaconda3/envs/spknlp/lib/python3.8/site-packages/pyspark/jars/spark-core_2.12-3.3.1.jar) to field java.lang.ref.Reference.referent\n", + "WARNING: Please consider reporting this to the maintainers of org.apache.spark.util.SizeEstimator$\n", + "WARNING: Use --illegal-access=warn to enable warnings of further illegal reflective access operations\n", + "WARNING: All illegal access operations will be denied in a future release\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[OK!]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " \r" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "23/07/01 22:01:22 WARN InstanceBuilder$NativeBLAS: Failed to load implementation from:dev.ludovic.netlib.blas.JNIBLAS\n", + "23/07/01 22:01:22 WARN InstanceBuilder$NativeBLAS: Failed to load implementation from:dev.ludovic.netlib.blas.ForeignLinkerBLAS\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " \r" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+-----------------------------------+------------------------------------------+\n", + "|finished_doc_similarity_rankings_id|finished_doc_similarity_rankings_neighbors|\n", + "+-----------------------------------+------------------------------------------+\n", + "|1510101612 |[(1634839239,0.12448559273510636)] |\n", + "|1634839239 |[(1510101612,0.12448559273510636)] |\n", + "|-612640902 |[(1274183715,0.12201215887654807)] |\n", + "|1274183715 |[(-612640902,0.12201215887654807)] |\n", + "|-1320876223 |[(1293373212,0.17848861258809434)] |\n", + "|1293373212 |[(-1320876223,0.17848861258809434)] |\n", + "|-1548374770 |[(-1719102856,0.2329717161223739)] |\n", + "|-1719102856 |[(-1548374770,0.2329717161223739)] |\n", + "+-----------------------------------+------------------------------------------+\n", + "\n" + ] + } + ], + "source": [ + "from sparknlp.annotator.similarity.document_similarity_ranker import *\n", + "\n", + "document_assembler = DocumentAssembler() \\\n", + " .setInputCol(\"text\") \\\n", + " .setOutputCol(\"document\")\n", + "sentence_detector = SentenceDetector() \\\n", + " .setInputCols([\"document\"]) \\\n", + " .setOutputCol(\"sentence\")\n", + "tokenizer = Tokenizer() \\\n", + " .setInputCols([\"sentence\"]) \\\n", + " .setOutputCol(\"token\")\n", + "\n", + "sentence_embeddings = RoBertaSentenceEmbeddings.pretrained() \\\n", + " .setInputCols([\"document\"]) \\\n", + " .setOutputCol(\"sentence_embeddings\")\n", + "\n", + "document_similarity_ranker = DocumentSimilarityRankerApproach() \\\n", + " .setInputCols(\"sentence_embeddings\") \\\n", + " .setOutputCol(\"doc_similarity_rankings\") \\\n", + " .setSimilarityMethod(\"brp\") \\\n", + " .setNumberOfNeighbours(1) \\\n", + " .setBucketLength(2.0) \\\n", + " .setNumHashTables(3) \\\n", + " .setVisibleDistances(True) \\\n", + " .setIdentityRanking(False)\n", + "\n", + "document_similarity_ranker_finisher = DocumentSimilarityRankerFinisher() \\\n", + " .setInputCols(\"doc_similarity_rankings\") \\\n", + " .setOutputCols(\n", + " \"finished_doc_similarity_rankings_id\",\n", + " \"finished_doc_similarity_rankings_neighbors\") \\\n", + " .setExtractNearestNeighbor(True)\n", + "\n", + "pipeline = Pipeline(stages=[\n", + " document_assembler,\n", + " sentence_detector,\n", + " tokenizer,\n", + " sentence_embeddings,\n", + " document_similarity_ranker,\n", + " document_similarity_ranker_finisher\n", + " ])\n", + "\n", + "docSimRankerPipeline = pipeline.fit(data).transform(data)\n", + "# TODO add write/read pipeline\n", + "(\n", + " docSimRankerPipeline\n", + " .select(\n", + " \"finished_doc_similarity_rankings_id\",\n", + " \"finished_doc_similarity_rankings_neighbors\"\n", + " ).show(10, False)\n", + ")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "54eca293", + "metadata": {}, + "source": [ + "## Result analysis for consistent result confirmation\n", + "#### The test is asserting the initial hypothesis. The documents were created similar in pair: 1-2, 3-4, 5-6, 7-8.\n", + "For instance document 1 and 2 are detected mutually best neighbors at the very same distance respectively:\n", + "- document ID 1510101612 has his best similar document in (1634839239,0.12448559273510636) at distance 0.12448559273510636\n", + "- document ID 1634839239 has his best similar document in (1510101612,0.12448559273510636) at distance 0.12448559273510636\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2cde88af", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/python/sparknlp/lib/test_doc_sim_ranker.ipynb b/python/sparknlp/lib/test_doc_sim_ranker.ipynb deleted file mode 100644 index 4aa8694a0e5daa..00000000000000 --- a/python/sparknlp/lib/test_doc_sim_ranker.ipynb +++ /dev/null @@ -1,223 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "id": "1a9dd32e", - "metadata": {}, - "outputs": [], - "source": [ - "# Import Spark NLP\n", - "from sparknlp.base import *\n", - "from sparknlp.annotator import *\n", - "from sparknlp.pretrained import PretrainedPipeline\n", - "import sparknlp\n", - "from pyspark.sql import SparkSession" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "a3f563d5", - "metadata": {}, - "outputs": [], - "source": [ - "data = spark.createDataFrame([\n", - " [\"First document, this is my first sentence. This is my second sentence.\"],\n", - " [\"Second document, this is my second sentence. This is my second sentence.\"],\n", - " [\"Third document, climate change is arguably one of the most pressing problems of our time.\"],\n", - " [\"Fourth document, climate change is definitely one of the most pressing problems of our time.\"],\n", - " [\"Fifth document, Florence in Italy, is among the most beautiful cities in Europe.\"],\n", - " [\"Sixth document, Florence in Italy, is a very beautiful city in Europe like Lyon in France.\"],\n", - " [\"Seventh document, the French Riviera is the Mediterranean coastline of the southeast corner of France.\"],\n", - " [\"Eighth document, the warmest place in France is the French Riviera coast in Southern France.\"]\n", - " ]).toDF(\"text\")" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "34604126", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[Stage 0:> (0 + 1) / 1]\r" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "+--------------------+\n", - "| text|\n", - "+--------------------+\n", - "|First document, t...|\n", - "|Second document, ...|\n", - "|Third document, c...|\n", - "|Fourth document, ...|\n", - "|Fifth document, F...|\n", - "|Sixth document, F...|\n", - "|Seventh document,...|\n", - "|Eighth document, ...|\n", - "+--------------------+\n", - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\r", - " \r" - ] - } - ], - "source": [ - "data.show()" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "9a8f9eae", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "sent_roberta_base download started this may take some time.\n", - "Approximate size to download 284.8 MB\n", - "[ — ]sent_roberta_base download started this may take some time.\n", - "Approximate size to download 284.8 MB\n", - "Download done! Loading the resource.\n", - "[ — ]" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-04-20 23:24:27.498674: I external/org_tensorflow/tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n", - "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[OK!]\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " \r" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "23/04/20 23:24:36 WARN InstanceBuilder$NativeBLAS: Failed to load implementation from:dev.ludovic.netlib.blas.JNIBLAS\n", - "23/04/20 23:24:36 WARN InstanceBuilder$NativeBLAS: Failed to load implementation from:dev.ludovic.netlib.blas.ForeignLinkerBLAS\n", - "+------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-----------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-------------------+-------------------------+\n", - "|text |document |sentence |token |sentence_embeddings |doc_similarity_rankings |finished_doc_similarity_rankings_id|finished_doc_similarity_rankings_neighbors |nearest_neighbor_id|nearest_neighbor_distance|\n", - "+------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-----------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-------------------+-------------------------+\n", - "|First document, this is my first sentence. This is my second sentence. |[{document, 0, 69, First document, this is my first sentence. This is my second sentence., {sentence -> 0}, []}] |[{document, 0, 41, First document, this is my first sentence., {sentence -> 0}, []}, {document, 43, 69, This is my second sentence., {sentence -> 1}, []}] |[{token, 0, 4, First, {sentence -> 0}, []}, {token, 6, 13, document, {sentence -> 0}, []}, {token, 14, 14, ,, {sentence -> 0}, []}, {token, 16, 19, this, {sentence -> 0}, []}, {token, 21, 22, is, {sentence -> 0}, []}, {token, 24, 25, my, {sentence -> 0}, []}, {token, 27, 31, first, {sentence -> 0}, []}, {token, 33, 40, sentence, {sentence -> 0}, []}, {token, 41, 41, ., {sentence -> 0}, []}, {token, 43, 46, This, {sentence -> 1}, []}, {token, 48, 49, is, {sentence -> 1}, []}, {token, 51, 52, my, {sentence -> 1}, []}, {token, 54, 59, second, {sentence -> 1}, []}, {token, 61, 68, sentence, {sentence -> 1}, []}, {token, 69, 69, ., {sentence -> 1}, []}] |[{sentence_embeddings, 0, 69, First document, this is my first sentence. This is my second sentence., {sentence -> 0, token -> First document, this is my first sentence. This is my second sentence., pieceId -> -1, isWordStart -> true}, [-0.0016509573, -0.20525712, -0.21965097, -0.059896577, 0.1287623, 0.19543253, 0.2601918, -0.08692171, -0.105864875, -0.1537919, 0.23202443, 0.018591736, -0.09253034, 0.086080864, -0.13730444, 0.4803275, 0.22649625, -0.4538324, 0.060501292, -0.022498287, -0.2590919, 0.052696917, 0.459304, 0.34466252, 0.096251465, 0.052280027, -0.13888891, -0.019944986, 0.16517194, 0.23654841, 0.28898573, 0.06694212, 0.10338869, 0.25518826, -0.23867318, 0.06869716, -0.3045499, 0.008243265, 0.2485094, -0.18031433, -0.071846046, 0.15547389, 0.20157382, -0.15197659, -0.11142959, 0.40137476, 0.2453684, 0.016406162, -0.09601788, -0.09317206, -0.35614085, 0.32941064, 0.28732747, 0.192841, -0.010993258, 0.03665424, -0.15332705, 0.26033446, -0.09785265, -0.100575626, -0.11137454, -0.20248345, -0.011054744, -0.06887667, 0.010282027, -0.12600632, 0.09104587, -0.1361701, -0.16300207, 0.07745324, -0.07060441, 0.15660061, 0.17127551, -0.31203714, -0.28957927, 0.06316288, -0.5809974, -0.11879944, 0.29057372, 0.44116113, -0.12183485, 0.1956723, 0.05491798, 0.21726313, -0.014335553, -0.042902187, -0.051117186, -0.10438917, 0.17740841, 0.26991013, -0.20177917, -0.39176428, 0.056344096, 0.018784363, -0.08682689, 0.008575063, -0.02186684, -0.06476933, -0.15295796, -0.18002006, 0.07875256, -0.2424122, -0.14407246, 0.2685751, -0.0313854, -0.19538617, -0.015640117, 0.29632342, 0.07008212, -0.09939836, -0.16506532, 0.41788468, 0.29194131, 0.011487283, 0.015623666, 0.16049236, 0.12739733, -0.2970833, 0.42711484, -0.30154955, -0.0042597246, -0.09696268, 0.11135193, 0.13904221, -0.21608506, 0.29018146, 0.16236295, 0.25396034, 0.19324322, 0.08677476, -0.053531177, 0.14517988, -0.11484923, 0.15421726, 0.22026348, 0.11952507, 0.010373934, -0.34372324, -0.23538907, 0.2590132, 0.32607847, 0.18607004, -0.041907027, 0.17356849, 0.11717049, 0.23005699, 0.14421561, -0.40986398, 0.04959026, 0.32919577, 0.11714556, 0.15356015, -0.05093626, -0.27187335, -0.2434729, -0.0744803, 0.046752095, -0.31802502, -0.13378103, 0.37520665, 0.04794983, -0.015131961, -0.15734115, -0.2253003, -0.006646415, -0.10654589, 0.03465094, 0.08154701, -0.0929395, -0.3991926, -0.09223294, -0.55187756, -0.13733594, 0.20702904, -0.30563506, 0.24306192, -0.30793938, 0.10908097, 0.37263408, 0.051739924, 0.013253078, -0.18778512, -0.010503004, 0.13689041, 0.33355665, 0.22972348, -0.38842443, 0.09157828, 0.16181372, 0.25097308, 0.12582561, -0.040875636, -0.13416474, 0.14245158, -0.19798084, 0.16056503, -0.20634986, 0.17811936, -0.23963411, -0.20601338, 0.2909662, -0.41837695, -0.043886326, 0.10675116, 0.2580096, 0.012383441, -0.020623814, -0.08108415, 0.11714205, 0.18568753, 0.13704987, -0.38517642, 0.27483293, -0.027029455, -0.029421767, -0.03520134, 0.17171104, 0.25429082, 0.096434996, -0.40192246, -0.15646617, 0.108028576, 0.27889878, -0.22277395, 0.16855176, -0.3042606, -0.3762433, -0.1455701, 0.18953793, 0.21115941, 0.15629233, -0.26154017, 0.18894924, -0.09465889, -0.44113842, -0.36655298, -0.109620884, 0.2422129, 0.16998006, 0.1736859, 0.24283548, 0.023980502, 0.1371494, 0.15047146, 0.19360293, -0.13204347, 0.17984484, -0.36500728, -0.05806427, -0.26840413, -0.19872135, -0.23028538, 0.3935414, -0.19386484, 0.23332444, 0.39288434, -0.30275175, -0.10890826, 0.15745756, 0.10137965, 0.06952018, -0.13416727, 0.21371306, 0.17559311, -0.101447366, 0.23363695, -0.0016540436, 0.27593082, 0.19691755, 0.10001489, 0.14923371, 0.13464274, -0.15578793, 0.05758963, 0.018758772, -0.018384326, -0.24449226, -0.13423687, 0.22088864, -0.0455652, 0.027285956, -0.1665838, -0.107885145, 0.021460643, 0.3850578, -0.35932088, 0.26157242, 0.07409609, 0.14653832, -0.22538042, -0.20356905, 0.065705076, 0.18143511, -0.40029642, -0.014136726, 0.15440395, 0.10932906, 0.21850891, 0.2580729, -0.00988111, -0.120041765, 0.4927309, -0.14106606, -0.12887348, 0.278349, -0.26594374, -0.27160963, 0.2535723, -0.01851703, 0.30484486, 0.14516489, 0.030297654, 0.042642463, -0.6132387, 0.07002391, -0.4603895, -0.012375533, 0.022541162, -0.09806742, -0.18119614, 0.13785718, 0.2639673, -0.2530231, -0.02733121, 0.20391333, 0.07542794, -0.09949427, 0.48948997, -0.014352312, 0.21734369, -0.06898155, 0.2560105, -0.1990966, 0.27983844, -0.27486038, -0.113962464, 0.015387479, 0.0917569, 0.05042972, -0.05722061, -0.33860856, 0.20994869, 0.010669914, -0.042475466, -0.06582815, 0.09746496, -0.018288288, 0.05784519, 0.0482757, 0.32611609, 0.20912243, -0.018082418, -0.380203, -0.024771776, -0.10463602, 0.040596716, 0.027981333, -0.033508264, 0.43150952, -0.1027142, 0.014048678, -0.1371664, 0.26183385, 0.2007899, 0.1397189, 0.12691422, 0.092273235, 0.15318273, -0.036203247, -0.0019277359, -0.122955844, -0.23079583, -0.27070555, 0.21413256, -0.22849205, -0.15937866, 0.16445585, 0.2226356, -0.13215786, 0.16300718, 0.3076105, 0.10777309, -0.15043095, 0.25800404, -0.10401015, 0.11895908, 0.31831232, -0.027889991, 0.16663, 0.50905895, 0.20906426, -0.3731108, -0.015852178, -0.21458037, -0.008900974, 0.23457617, -0.13320251, 0.17811044, 0.38757527, 0.31200206, 0.45592153, -0.017489515, -0.10929343, 0.08955372, 0.22110991, 0.021531774, -0.1519175, -0.1773438, 0.2732921, 0.06679491, -0.14536235, -0.021941066, -0.14891659, 0.016744306, -0.12736456, -0.39618, 0.039470803, 0.20796895, -0.4877525, 0.1055665, -0.2754277, 0.04144014, -0.22956206, 0.20604385, -0.22717506, -0.10176536, 0.39214277, -0.08812965, 0.029923324, -0.14861423, -0.15354954, 0.01690382, 0.030926324, -0.04741114, -0.023319794, 0.3530178, -0.11661981, 0.026740639, 0.02915204, 0.20712481, -0.062735416, 0.16828942, 0.027333798, -0.143883, -0.38526252, 0.11417671, -0.20366596, -0.44497097, -0.3797921, 0.36764264, -0.15239948, -0.24930836, -0.22178406, -0.26763546, 0.059928652, 0.18547069, 0.46448106, -0.38581342, -0.0952114, 0.50129086, -0.07868489, -0.16569282, 0.28730562, 0.1784999, -0.3028391, 0.32795244, 0.26607093, -0.04681752, 0.028598929, 0.5256846, 0.11711999, 0.19183147, -0.21013169, 0.44537762, -0.22686206, 0.31831375, -0.14511606, -0.19815892, -0.22267957, 0.014032256, 0.32726866, 0.20925169, -0.42334914, -0.11525112, 0.03519667, 0.35435578, -0.386533, -0.061628006, -0.00946854, -0.33106926, 0.1413482, 0.08982125, 0.21938783, -0.36983567, -0.013867053, 0.40048254, -0.28595185, 0.12054411, 0.31387663, 0.06425676, 0.36636186, -0.02155459, -0.0055017034, 0.027993213, -0.2304683, -0.014896044, 0.13527954, 0.57070816, 0.15139924, -0.38313928, 0.09329611, 0.24846943, -0.18317448, 0.3021817, -0.09956795, -0.031919435, 0.26749623, -0.03647182, 0.13850948, -0.08908831, -0.23334776, -0.3168528, 0.38110238, -0.19650906, -0.10419602, -0.16122277, -0.10340526, -0.16876405, 0.039111413, -0.3708464, 0.31809127, 0.14100179, -0.18282901, -0.07942389, -0.089590766, -0.14431885, -0.22517395, -0.25504246, 0.4375917, -0.15872298, -0.4546147, 0.26022837, 0.03248598, 0.35889503, 0.048524577, 0.088019766, -0.06272402, 0.15277462, 0.08405457, -0.12085052, 0.2635839, 0.06471835, -0.53696585, -0.12954685, -0.20841905, 0.08119028, 0.204219, -0.3485882, 0.03504887, 0.010403169, 0.14514661, 0.00751219, -0.098806985, -0.07772141, 0.40441778, 0.24756968, 0.3027828, 0.09314904, 0.23755553, -0.031587124, -0.33195436, 0.043194465, 0.08573535, -0.18050656, 0.4158775, -0.07615283, -0.3771687, -0.064762294, 0.40367717, 0.09764434, 0.02901462, -0.04842578, 0.21205749, 0.16108298, -0.14622071, 0.17626472, -0.029647602, -0.13114613, -0.098030314, 0.07949154, -0.21446016, 0.0422025, -0.15453711, -0.0126237925, -0.20044088, 0.0046409313, -0.19496118, 0.2493613, -0.3019857, 0.07960567, 0.0483416, 0.28941196, -0.34885955, -0.16683857, -0.076981924, 0.14302921, 0.262528, 0.34287122, 0.026721897, 0.025925094, -0.17189877, -0.24605455, 0.050659895, -0.21924202, 0.14178936, 0.08741318, 0.2537666, -0.31009108, -0.18676534, 0.2118603, -0.09943448, -0.14400531, 0.39799818, 0.2653751, 0.20913126, 0.012621317, 0.25341883, 0.026780201, -0.1865512, -0.12074198, -0.25787765, 0.073576815, -0.08996679, -0.059922904, -0.06613629, -0.11565135, -0.19995238, -0.15129463, 0.1596153, 0.14251925, 0.025685819, -0.05717514, -0.012424833, -0.28294817, 0.32567903, 0.025750922, 0.06216015, -0.05263431, 0.045462035, -0.13553426, 0.23230933, 0.21270712, 0.075220734, -0.18877773, -0.0791943, -0.28871432, -0.35735613, 0.063926004, 0.12319752, 0.108247474, -0.083363935, -0.24347112, -0.004402367, -0.15542541, 0.19257565, 0.022904381, -0.15720096, -0.08451907, -0.04471166, -0.024871575, 0.07304403, -0.19043835, -0.19255094, -0.110913895, -0.07317639, -0.0738659, 0.3432893, -0.05318059, 0.2751972, -0.15310696, -0.0040784623, -0.17644951, 0.10200317, -0.061957322, 0.05185596, 0.2646503, -0.44402647, -0.1641448, -0.030240616, -0.18341458, -0.14352442, -0.082672276, -0.024050146, 0.24113926, -0.36153135, 0.22421445, -0.13683006, 0.19264282, -0.07566274, -0.2577963, -0.14437243, -0.005780896, 0.2361684, -0.34697405, -0.2580383, -0.26091143, -0.09685133, -0.078017235, -0.26482865, 0.41297123, -0.107337065, -0.07583126, 0.007925678, 0.46675873, 0.21356691, 0.16632208, 0.22148071, -0.032334905, 0.03056415, 0.09642622, -0.45709848, 0.24851573, -0.2539892, -0.12904367, 0.016261106, 0.12153378, -0.0012138055, 0.025865352, -0.13303627, -0.09864319, 0.21006866, -0.35695034, -0.045082778, 0.25232777, 0.14546284, -0.24803042, 0.02825165, 0.12093924, 0.37656778, 0.08200147, -0.22424765, 0.10366332, -0.3352094, -0.017510755, -0.19172588, -0.30078256, 0.16272469, -0.06270328, 0.07839331, -0.098105066, -0.30520752, 0.2076339, -0.08514935, -0.06716773, 0.4287407, 0.02735758, -0.13251884, 0.14675677, 0.029392201, 0.032187577, -0.098731965, 0.28921035, 0.23158008, -0.28971958, 0.15396328, -0.14108004, -0.057366837, -0.07640423]}] |[{doc_similarity_rankings, 0, 69, First document, this is my first sentence. This is my second sentence., {pieceId -> -1, lshId -> 1510101612, isWordStart -> true, token -> First document, this is my first sentence. This is my second sentence., lshNeighbors -> [(1634839239,0.12448559273510636),(1274183715,0.36788497133113396),(-612640902,0.3851201869623605),(1293373212,0.3979194244143298),(-1320876223,0.3994126224276987),(-1719102856,0.4043799951515284),(-1548374770,0.41026101952006294)], sentence -> 0}, [-0.0016509573, -0.20525712, -0.21965097, -0.059896577, 0.1287623, 0.19543253, 0.2601918, -0.08692171, -0.105864875, -0.1537919, 0.23202443, 0.018591736, -0.09253034, 0.086080864, -0.13730444, 0.4803275, 0.22649625, -0.4538324, 0.060501292, -0.022498287, -0.2590919, 0.052696917, 0.459304, 0.34466252, 0.096251465, 0.052280027, -0.13888891, -0.019944986, 0.16517194, 0.23654841, 0.28898573, 0.06694212, 0.10338869, 0.25518826, -0.23867318, 0.06869716, -0.3045499, 0.008243265, 0.2485094, -0.18031433, -0.071846046, 0.15547389, 0.20157382, -0.15197659, -0.11142959, 0.40137476, 0.2453684, 0.016406162, -0.09601788, -0.09317206, -0.35614085, 0.32941064, 0.28732747, 0.192841, -0.010993258, 0.03665424, -0.15332705, 0.26033446, -0.09785265, -0.100575626, -0.11137454, -0.20248345, -0.011054744, -0.06887667, 0.010282027, -0.12600632, 0.09104587, -0.1361701, -0.16300207, 0.07745324, -0.07060441, 0.15660061, 0.17127551, -0.31203714, -0.28957927, 0.06316288, -0.5809974, -0.11879944, 0.29057372, 0.44116113, -0.12183485, 0.1956723, 0.05491798, 0.21726313, -0.014335553, -0.042902187, -0.051117186, -0.10438917, 0.17740841, 0.26991013, -0.20177917, -0.39176428, 0.056344096, 0.018784363, -0.08682689, 0.008575063, -0.02186684, -0.06476933, -0.15295796, -0.18002006, 0.07875256, -0.2424122, -0.14407246, 0.2685751, -0.0313854, -0.19538617, -0.015640117, 0.29632342, 0.07008212, -0.09939836, -0.16506532, 0.41788468, 0.29194131, 0.011487283, 0.015623666, 0.16049236, 0.12739733, -0.2970833, 0.42711484, -0.30154955, -0.0042597246, -0.09696268, 0.11135193, 0.13904221, -0.21608506, 0.29018146, 0.16236295, 0.25396034, 0.19324322, 0.08677476, -0.053531177, 0.14517988, -0.11484923, 0.15421726, 0.22026348, 0.11952507, 0.010373934, -0.34372324, -0.23538907, 0.2590132, 0.32607847, 0.18607004, -0.041907027, 0.17356849, 0.11717049, 0.23005699, 0.14421561, -0.40986398, 0.04959026, 0.32919577, 0.11714556, 0.15356015, -0.05093626, -0.27187335, -0.2434729, -0.0744803, 0.046752095, -0.31802502, -0.13378103, 0.37520665, 0.04794983, -0.015131961, -0.15734115, -0.2253003, -0.006646415, -0.10654589, 0.03465094, 0.08154701, -0.0929395, -0.3991926, -0.09223294, -0.55187756, -0.13733594, 0.20702904, -0.30563506, 0.24306192, -0.30793938, 0.10908097, 0.37263408, 0.051739924, 0.013253078, -0.18778512, -0.010503004, 0.13689041, 0.33355665, 0.22972348, -0.38842443, 0.09157828, 0.16181372, 0.25097308, 0.12582561, -0.040875636, -0.13416474, 0.14245158, -0.19798084, 0.16056503, -0.20634986, 0.17811936, -0.23963411, -0.20601338, 0.2909662, -0.41837695, -0.043886326, 0.10675116, 0.2580096, 0.012383441, -0.020623814, -0.08108415, 0.11714205, 0.18568753, 0.13704987, -0.38517642, 0.27483293, -0.027029455, -0.029421767, -0.03520134, 0.17171104, 0.25429082, 0.096434996, -0.40192246, -0.15646617, 0.108028576, 0.27889878, -0.22277395, 0.16855176, -0.3042606, -0.3762433, -0.1455701, 0.18953793, 0.21115941, 0.15629233, -0.26154017, 0.18894924, -0.09465889, -0.44113842, -0.36655298, -0.109620884, 0.2422129, 0.16998006, 0.1736859, 0.24283548, 0.023980502, 0.1371494, 0.15047146, 0.19360293, -0.13204347, 0.17984484, -0.36500728, -0.05806427, -0.26840413, -0.19872135, -0.23028538, 0.3935414, -0.19386484, 0.23332444, 0.39288434, -0.30275175, -0.10890826, 0.15745756, 0.10137965, 0.06952018, -0.13416727, 0.21371306, 0.17559311, -0.101447366, 0.23363695, -0.0016540436, 0.27593082, 0.19691755, 0.10001489, 0.14923371, 0.13464274, -0.15578793, 0.05758963, 0.018758772, -0.018384326, -0.24449226, -0.13423687, 0.22088864, -0.0455652, 0.027285956, -0.1665838, -0.107885145, 0.021460643, 0.3850578, -0.35932088, 0.26157242, 0.07409609, 0.14653832, -0.22538042, -0.20356905, 0.065705076, 0.18143511, -0.40029642, -0.014136726, 0.15440395, 0.10932906, 0.21850891, 0.2580729, -0.00988111, -0.120041765, 0.4927309, -0.14106606, -0.12887348, 0.278349, -0.26594374, -0.27160963, 0.2535723, -0.01851703, 0.30484486, 0.14516489, 0.030297654, 0.042642463, -0.6132387, 0.07002391, -0.4603895, -0.012375533, 0.022541162, -0.09806742, -0.18119614, 0.13785718, 0.2639673, -0.2530231, -0.02733121, 0.20391333, 0.07542794, -0.09949427, 0.48948997, -0.014352312, 0.21734369, -0.06898155, 0.2560105, -0.1990966, 0.27983844, -0.27486038, -0.113962464, 0.015387479, 0.0917569, 0.05042972, -0.05722061, -0.33860856, 0.20994869, 0.010669914, -0.042475466, -0.06582815, 0.09746496, -0.018288288, 0.05784519, 0.0482757, 0.32611609, 0.20912243, -0.018082418, -0.380203, -0.024771776, -0.10463602, 0.040596716, 0.027981333, -0.033508264, 0.43150952, -0.1027142, 0.014048678, -0.1371664, 0.26183385, 0.2007899, 0.1397189, 0.12691422, 0.092273235, 0.15318273, -0.036203247, -0.0019277359, -0.122955844, -0.23079583, -0.27070555, 0.21413256, -0.22849205, -0.15937866, 0.16445585, 0.2226356, -0.13215786, 0.16300718, 0.3076105, 0.10777309, -0.15043095, 0.25800404, -0.10401015, 0.11895908, 0.31831232, -0.027889991, 0.16663, 0.50905895, 0.20906426, -0.3731108, -0.015852178, -0.21458037, -0.008900974, 0.23457617, -0.13320251, 0.17811044, 0.38757527, 0.31200206, 0.45592153, -0.017489515, -0.10929343, 0.08955372, 0.22110991, 0.021531774, -0.1519175, -0.1773438, 0.2732921, 0.06679491, -0.14536235, -0.021941066, -0.14891659, 0.016744306, -0.12736456, -0.39618, 0.039470803, 0.20796895, -0.4877525, 0.1055665, -0.2754277, 0.04144014, -0.22956206, 0.20604385, -0.22717506, -0.10176536, 0.39214277, -0.08812965, 0.029923324, -0.14861423, -0.15354954, 0.01690382, 0.030926324, -0.04741114, -0.023319794, 0.3530178, -0.11661981, 0.026740639, 0.02915204, 0.20712481, -0.062735416, 0.16828942, 0.027333798, -0.143883, -0.38526252, 0.11417671, -0.20366596, -0.44497097, -0.3797921, 0.36764264, -0.15239948, -0.24930836, -0.22178406, -0.26763546, 0.059928652, 0.18547069, 0.46448106, -0.38581342, -0.0952114, 0.50129086, -0.07868489, -0.16569282, 0.28730562, 0.1784999, -0.3028391, 0.32795244, 0.26607093, -0.04681752, 0.028598929, 0.5256846, 0.11711999, 0.19183147, -0.21013169, 0.44537762, -0.22686206, 0.31831375, -0.14511606, -0.19815892, -0.22267957, 0.014032256, 0.32726866, 0.20925169, -0.42334914, -0.11525112, 0.03519667, 0.35435578, -0.386533, -0.061628006, -0.00946854, -0.33106926, 0.1413482, 0.08982125, 0.21938783, -0.36983567, -0.013867053, 0.40048254, -0.28595185, 0.12054411, 0.31387663, 0.06425676, 0.36636186, -0.02155459, -0.0055017034, 0.027993213, -0.2304683, -0.014896044, 0.13527954, 0.57070816, 0.15139924, -0.38313928, 0.09329611, 0.24846943, -0.18317448, 0.3021817, -0.09956795, -0.031919435, 0.26749623, -0.03647182, 0.13850948, -0.08908831, -0.23334776, -0.3168528, 0.38110238, -0.19650906, -0.10419602, -0.16122277, -0.10340526, -0.16876405, 0.039111413, -0.3708464, 0.31809127, 0.14100179, -0.18282901, -0.07942389, -0.089590766, -0.14431885, -0.22517395, -0.25504246, 0.4375917, -0.15872298, -0.4546147, 0.26022837, 0.03248598, 0.35889503, 0.048524577, 0.088019766, -0.06272402, 0.15277462, 0.08405457, -0.12085052, 0.2635839, 0.06471835, -0.53696585, -0.12954685, -0.20841905, 0.08119028, 0.204219, -0.3485882, 0.03504887, 0.010403169, 0.14514661, 0.00751219, -0.098806985, -0.07772141, 0.40441778, 0.24756968, 0.3027828, 0.09314904, 0.23755553, -0.031587124, -0.33195436, 0.043194465, 0.08573535, -0.18050656, 0.4158775, -0.07615283, -0.3771687, -0.064762294, 0.40367717, 0.09764434, 0.02901462, -0.04842578, 0.21205749, 0.16108298, -0.14622071, 0.17626472, -0.029647602, -0.13114613, -0.098030314, 0.07949154, -0.21446016, 0.0422025, -0.15453711, -0.0126237925, -0.20044088, 0.0046409313, -0.19496118, 0.2493613, -0.3019857, 0.07960567, 0.0483416, 0.28941196, -0.34885955, -0.16683857, -0.076981924, 0.14302921, 0.262528, 0.34287122, 0.026721897, 0.025925094, -0.17189877, -0.24605455, 0.050659895, -0.21924202, 0.14178936, 0.08741318, 0.2537666, -0.31009108, -0.18676534, 0.2118603, -0.09943448, -0.14400531, 0.39799818, 0.2653751, 0.20913126, 0.012621317, 0.25341883, 0.026780201, -0.1865512, -0.12074198, -0.25787765, 0.073576815, -0.08996679, -0.059922904, -0.06613629, -0.11565135, -0.19995238, -0.15129463, 0.1596153, 0.14251925, 0.025685819, -0.05717514, -0.012424833, -0.28294817, 0.32567903, 0.025750922, 0.06216015, -0.05263431, 0.045462035, -0.13553426, 0.23230933, 0.21270712, 0.075220734, -0.18877773, -0.0791943, -0.28871432, -0.35735613, 0.063926004, 0.12319752, 0.108247474, -0.083363935, -0.24347112, -0.004402367, -0.15542541, 0.19257565, 0.022904381, -0.15720096, -0.08451907, -0.04471166, -0.024871575, 0.07304403, -0.19043835, -0.19255094, -0.110913895, -0.07317639, -0.0738659, 0.3432893, -0.05318059, 0.2751972, -0.15310696, -0.0040784623, -0.17644951, 0.10200317, -0.061957322, 0.05185596, 0.2646503, -0.44402647, -0.1641448, -0.030240616, -0.18341458, -0.14352442, -0.082672276, -0.024050146, 0.24113926, -0.36153135, 0.22421445, -0.13683006, 0.19264282, -0.07566274, -0.2577963, -0.14437243, -0.005780896, 0.2361684, -0.34697405, -0.2580383, -0.26091143, -0.09685133, -0.078017235, -0.26482865, 0.41297123, -0.107337065, -0.07583126, 0.007925678, 0.46675873, 0.21356691, 0.16632208, 0.22148071, -0.032334905, 0.03056415, 0.09642622, -0.45709848, 0.24851573, -0.2539892, -0.12904367, 0.016261106, 0.12153378, -0.0012138055, 0.025865352, -0.13303627, -0.09864319, 0.21006866, -0.35695034, -0.045082778, 0.25232777, 0.14546284, -0.24803042, 0.02825165, 0.12093924, 0.37656778, 0.08200147, -0.22424765, 0.10366332, -0.3352094, -0.017510755, -0.19172588, -0.30078256, 0.16272469, -0.06270328, 0.07839331, -0.098105066, -0.30520752, 0.2076339, -0.08514935, -0.06716773, 0.4287407, 0.02735758, -0.13251884, 0.14675677, 0.029392201, 0.032187577, -0.098731965, 0.28921035, 0.23158008, -0.28971958, 0.15396328, -0.14108004, -0.057366837, -0.07640423]}] |1510101612 |[(1634839239,0.12448559273510636),(1274183715,0.36788497133113396),(-612640902,0.3851201869623605),(1293373212,0.3979194244143298),(-1320876223,0.3994126224276987),(-1719102856,0.4043799951515284),(-1548374770,0.41026101952006294)] |1634839239 |0.12448559273510636 |\n", - "|Second document, this is my second sentence. This is my second sentence. |[{document, 0, 71, Second document, this is my second sentence. This is my second sentence., {sentence -> 0}, []}] |[{document, 0, 43, Second document, this is my second sentence., {sentence -> 0}, []}, {document, 45, 71, This is my second sentence., {sentence -> 1}, []}]|[{token, 0, 5, Second, {sentence -> 0}, []}, {token, 7, 14, document, {sentence -> 0}, []}, {token, 15, 15, ,, {sentence -> 0}, []}, {token, 17, 20, this, {sentence -> 0}, []}, {token, 22, 23, is, {sentence -> 0}, []}, {token, 25, 26, my, {sentence -> 0}, []}, {token, 28, 33, second, {sentence -> 0}, []}, {token, 35, 42, sentence, {sentence -> 0}, []}, {token, 43, 43, ., {sentence -> 0}, []}, {token, 45, 48, This, {sentence -> 1}, []}, {token, 50, 51, is, {sentence -> 1}, []}, {token, 53, 54, my, {sentence -> 1}, []}, {token, 56, 61, second, {sentence -> 1}, []}, {token, 63, 70, sentence, {sentence -> 1}, []}, {token, 71, 71, ., {sentence -> 1}, []}] |[{sentence_embeddings, 0, 71, Second document, this is my second sentence. This is my second sentence., {sentence -> 0, token -> Second document, this is my second sentence. This is my second sentence., pieceId -> -1, isWordStart -> true}, [-7.9203E-4, -0.19994189, -0.21818015, -0.068899736, 0.12664562, 0.1954791, 0.25884947, -0.08906762, -0.096521795, -0.15146676, 0.23279426, 0.02231225, -0.091652475, 0.088964544, -0.13240255, 0.48075354, 0.22785556, -0.45156693, 0.062804036, -0.020177238, -0.25701693, 0.055076845, 0.4630905, 0.34240133, 0.09377383, 0.054915123, -0.14161351, -0.017130367, 0.1733012, 0.23704202, 0.28406826, 0.06854082, 0.10759672, 0.258621, -0.23658031, 0.06974518, -0.30307853, 0.010062803, 0.24419631, -0.17857389, -0.061605684, 0.15594025, 0.20267735, -0.14955261, -0.11479992, 0.39396986, 0.23352055, 0.012666446, -0.0962206, -0.09166719, -0.35843217, 0.33310696, 0.28614143, 0.19793509, -0.014299779, 0.03125637, -0.15131183, 0.26195115, -0.092621945, -0.09774493, -0.10871576, -0.20030053, -0.014465115, -0.058903776, 0.010456275, -0.12946285, 0.090752184, -0.13612632, -0.15917492, 0.073066816, -0.07082851, 0.15889694, 0.16751619, -0.31353, -0.28896278, 0.061326317, -0.5799537, -0.11939915, 0.28790224, 0.44152555, -0.119611256, 0.19590247, 0.051425744, 0.21189548, -0.01531638, -0.046113886, -0.056209974, -0.10596114, 0.18074782, 0.27465618, -0.20207298, -0.38995308, 0.055122897, 0.023203688, -0.0892057, 0.0093817655, -0.014075832, -0.06670877, -0.15410645, -0.1759522, 0.083973736, -0.24712972, -0.13847028, 0.26776898, -0.03092194, -0.20474298, -0.010832185, 0.28615507, 0.06926395, -0.09348502, -0.15698338, 0.41989443, 0.3010278, 0.005741507, 0.010479024, 0.16931498, 0.12767749, -0.29227796, 0.42611268, -0.30473405, -0.0071336213, -0.100375555, 0.09961725, 0.14721175, -0.21532084, 0.28975573, 0.15805171, 0.2587544, 0.18892317, 0.08308744, -0.053409446, 0.14420575, -0.117936045, 0.16155364, 0.21884927, 0.11623547, 0.016398214, -0.34012944, -0.23610827, 0.25944826, 0.3265321, 0.17884685, -0.029824225, 0.17586859, 0.11585, 0.22395201, 0.1441974, -0.40962303, 0.055336952, 0.33760974, 0.11725796, 0.1549155, -0.057688136, -0.27527377, -0.23793808, -0.06886171, 0.04727421, -0.32366428, -0.124764286, 0.37829912, 0.044418834, -0.014433529, -0.15863699, -0.23130025, -0.014072199, -0.116705395, 0.026241766, 0.082573965, -0.089801684, -0.39718676, -0.09980003, -0.5494743, -0.1430713, 0.21158108, -0.30317372, 0.24022597, -0.3023537, 0.111940525, 0.37496758, 0.05516233, 0.011221796, -0.18284394, -0.0047331, 0.13796207, 0.32586053, 0.23857506, -0.38834578, 0.09571896, 0.15859152, 0.2515127, 0.12696691, -0.037285045, -0.13651451, 0.13980338, -0.198333, 0.15924208, -0.20204458, 0.18000285, -0.24857216, -0.20852609, 0.29131892, -0.41542646, -0.04368751, 0.10883073, 0.26118433, 0.013824453, -0.027603198, -0.08514061, 0.117200404, 0.18326314, 0.14687406, -0.3891525, 0.27375975, -0.02553157, -0.033559944, -0.038643405, 0.1725868, 0.24817786, 0.10062808, -0.3984106, -0.15110281, 0.107347734, 0.2811384, -0.22248136, 0.17181505, -0.30180675, -0.37590006, -0.13890691, 0.1929018, 0.21564123, 0.14888352, -0.2589628, 0.18979052, -0.09733246, -0.44260895, -0.3626373, -0.10656619, 0.24600953, 0.17717755, 0.17176694, 0.24860108, 0.02450866, 0.13089818, 0.14760958, 0.1889736, -0.13462782, 0.17552358, -0.36474925, -0.054144062, -0.2719437, -0.20575465, -0.2259518, 0.390171, -0.1989032, 0.23816091, 0.39332652, -0.30287528, -0.11111271, 0.15751657, 0.10828888, 0.061609615, -0.13818201, 0.21809433, 0.17633602, -0.10041227, 0.22838311, -0.0037272298, 0.27824283, 0.19050625, 0.09929577, 0.14321278, 0.12866658, -0.1560034, 0.054702457, 0.0034277993, -0.023457147, -0.24974422, -0.12811284, 0.2138116, -0.043564834, 0.024615834, -0.17061573, -0.105848975, 0.01979917, 0.3876299, -0.3644679, 0.25748017, 0.0794696, 0.15106939, -0.2271831, -0.2029017, 0.06485374, 0.18893777, -0.40528575, -0.0067824377, 0.15022416, 0.11026635, 0.21823986, 0.25255138, -0.009357705, -0.11272444, 0.48919556, -0.14951092, -0.12166809, 0.28113428, -0.26702005, -0.2696553, 0.25774094, -0.020699237, 0.30593178, 0.13539925, 0.023027057, 0.04737817, -0.60946774, 0.07452417, -0.4618816, -0.012704029, 0.017143121, -0.0928661, -0.18195343, 0.13803084, 0.26418334, -0.25597656, -0.028610228, 0.20601481, 0.06913206, -0.105633095, 0.48713952, -0.017569678, 0.21389663, -0.06817315, 0.26016077, -0.2034709, 0.27557772, -0.27667582, -0.11112172, 0.017718645, 0.084379435, 0.043280818, -0.059255335, -0.3404926, 0.22076394, 0.0044043344, -0.037489407, -0.06305672, 0.09715581, -0.023487065, 0.062111195, 0.05335917, 0.32398656, 0.2121781, -0.015352369, -0.37581238, -0.02546437, -0.1003463, 0.049476393, 0.01883333, -0.03315766, 0.43547428, -0.10425473, 0.011709515, -0.13826483, 0.26741698, 0.20992693, 0.14296642, 0.12905589, 0.091058806, 0.15014836, -0.033118833, -0.0050203684, -0.1263514, -0.2340833, -0.27371702, 0.21375184, -0.22322327, -0.15550363, 0.16027404, 0.22693431, -0.12760872, 0.16786651, 0.30304092, 0.1020746, -0.15578815, 0.25656548, -0.101949446, 0.127458, 0.31914127, -0.029818093, 0.17126171, 0.5050371, 0.2140625, -0.36993846, -0.019031882, -0.21702774, -0.0075844345, 0.23762833, -0.13128382, 0.17342623, 0.3832189, 0.30292338, 0.45272982, -0.014391475, -0.10896908, 0.092533424, 0.22071956, 0.023630338, -0.15142313, -0.16476907, 0.2736263, 0.06355073, -0.14254087, -0.017445322, -0.15101986, 0.018103518, -0.13215697, -0.39054778, 0.031270053, 0.20609067, -0.4899748, 0.10231394, -0.28053063, 0.042639177, -0.2338356, 0.21435437, -0.23163229, -0.10278779, 0.38594186, -0.08828131, 0.033451065, -0.15233083, -0.14713119, 0.015353501, 0.024095697, -0.04088602, -0.021885296, 0.35037458, -0.12262792, 0.029172843, 0.031767074, 0.20502482, -0.059423614, 0.17006303, 0.03032189, -0.14278243, -0.38293302, 0.12350028, -0.20444815, -0.446956, -0.38407224, 0.36807615, -0.14949287, -0.25289682, -0.22304098, -0.26412737, 0.054438926, 0.1866949, 0.46163344, -0.3870432, -0.09577562, 0.4925763, -0.07591358, -0.17125858, 0.29294312, 0.18506376, -0.3011424, 0.33213896, 0.27247593, -0.04371708, 0.022509042, 0.5281235, 0.11490675, 0.18309964, -0.2098084, 0.44932392, -0.2238262, 0.32604268, -0.15083954, -0.19439663, -0.21758024, 0.016958022, 0.31868294, 0.20522523, -0.4294383, -0.1155165, 0.038857397, 0.35014954, -0.38389844, -0.06027076, -0.0022453207, -0.33463535, 0.14471063, 0.08705408, 0.21836443, -0.37401456, -0.018341504, 0.39743432, -0.28624484, 0.11763633, 0.31765255, 0.06626895, 0.36575744, -0.022255607, -0.010258075, 0.034847856, -0.23807333, -0.009544487, 0.13759036, 0.5633902, 0.15055975, -0.3893473, 0.099084534, 0.2487808, -0.18436259, 0.2964102, -0.0974038, -0.030039463, 0.26750058, -0.0327042, 0.13959204, -0.09072471, -0.23449129, -0.31150666, 0.37843606, -0.20243177, -0.10671544, -0.15774731, -0.100252956, -0.17092793, 0.043813135, -0.3745126, 0.324973, 0.13483994, -0.17662391, -0.082839765, -0.095041975, -0.13983752, -0.2234264, -0.25730914, 0.43138906, -0.15586619, -0.45626152, 0.25078535, 0.032389496, 0.3605528, 0.043308545, 0.082841, -0.05712051, 0.15781389, 0.09113666, -0.12152442, 0.26758134, 0.07198326, -0.5393054, -0.12838638, -0.20900357, 0.08293295, 0.20305818, -0.34222803, 0.030090628, 0.011068957, 0.15267001, 0.0166323, -0.09774065, -0.07362094, 0.4034169, 0.24240533, 0.29080132, 0.093652435, 0.22641939, -0.027256148, -0.33472311, 0.0421845, 0.084676325, -0.18856609, 0.41979697, -0.0742718, -0.37898067, -0.06616097, 0.40372992, 0.097068146, 0.028025001, -0.053321853, 0.21186478, 0.1617424, -0.14168897, 0.17353843, -0.026624104, -0.13292201, -0.10193648, 0.082835816, -0.2110324, 0.047005363, -0.15010522, -0.011524656, -0.21158908, 0.013927639, -0.20468919, 0.2549774, -0.30430713, 0.08803344, 0.04733768, 0.29572278, -0.34726125, -0.16165416, -0.07808548, 0.13735756, 0.26037022, 0.34519985, 0.02743408, 0.017144704, -0.16387558, -0.24629596, 0.05140357, -0.2149917, 0.14342026, 0.09417347, 0.2471141, -0.3102873, -0.1811054, 0.21190523, -0.0939989, -0.14163078, 0.39620474, 0.26059932, 0.2017156, 0.016578814, 0.2596664, 0.023013765, -0.18858111, -0.12537004, -0.2575313, 0.07671513, -0.084907316, -0.05217875, -0.0572651, -0.11264922, -0.20340498, -0.15451567, 0.15938708, 0.13023944, 0.024635538, -0.04755041, -0.013276761, -0.27871516, 0.32686794, 0.025345188, 0.056448072, -0.05157638, 0.055301744, -0.14052545, 0.23356776, 0.21044475, 0.074066274, -0.19347996, -0.075556666, -0.2903639, -0.35939184, 0.06149839, 0.122687556, 0.10089751, -0.076755464, -0.23811671, -0.00608524, -0.15459716, 0.19323912, 0.024601229, -0.16377638, -0.085954376, -0.042363558, -0.026964856, 0.068481274, -0.19715573, -0.19233052, -0.1171513, -0.07371926, -0.06850815, 0.3406645, -0.045532167, 0.27094424, -0.14512636, -0.004277191, -0.17940599, 0.10672061, -0.0615622, 0.05335321, 0.26168355, -0.4460772, -0.16097912, -0.03129851, -0.18674865, -0.14036278, -0.08535438, -0.030956995, 0.2338251, -0.3615369, 0.21861416, -0.12684713, 0.18677162, -0.07666991, -0.25236502, -0.14463082, -0.011356218, 0.24449459, -0.3400799, -0.2528164, -0.2593893, -0.09919247, -0.06947468, -0.2653912, 0.41220212, -0.105682805, -0.06773623, 0.0067983535, 0.4691811, 0.21326056, 0.16177358, 0.22778879, -0.032021854, 0.030037731, 0.09594971, -0.45707417, 0.25093716, -0.24953341, -0.13460502, 0.012793443, 0.123196214, -0.007967554, 0.021584822, -0.13040859, -0.09390109, 0.21663658, -0.35579094, -0.04199463, 0.26041034, 0.14653488, -0.24590142, 0.025850672, 0.12220801, 0.38460156, 0.07916422, -0.2231287, 0.10919217, -0.3318051, -0.01628104, -0.19389302, -0.30398846, 0.16407341, -0.07596026, 0.077111095, -0.100624435, -0.30112073, 0.20480298, -0.079184264, -0.07005212, 0.42865524, 0.028614324, -0.12899904, 0.15557994, 0.021030005, 0.036799897, -0.099277705, 0.2806157, 0.23383352, -0.28756225, 0.15074758, -0.13403846, -0.05876428, -0.07657761]}] |[{doc_similarity_rankings, 0, 71, Second document, this is my second sentence. This is my second sentence., {pieceId -> -1, lshId -> 1634839239, isWordStart -> true, token -> Second document, this is my second sentence. This is my second sentence., lshNeighbors -> [(1510101612,0.12448559273510636),(1274183715,0.3554576544360366),(-612640902,0.37472233818858686),(-1548374770,0.39013800843493296),(-1719102856,0.3901714913624425),(1293373212,0.39846872824443047),(-1320876223,0.3992484826857293)], sentence -> 0}, [-7.9203E-4, -0.19994189, -0.21818015, -0.068899736, 0.12664562, 0.1954791, 0.25884947, -0.08906762, -0.096521795, -0.15146676, 0.23279426, 0.02231225, -0.091652475, 0.088964544, -0.13240255, 0.48075354, 0.22785556, -0.45156693, 0.062804036, -0.020177238, -0.25701693, 0.055076845, 0.4630905, 0.34240133, 0.09377383, 0.054915123, -0.14161351, -0.017130367, 0.1733012, 0.23704202, 0.28406826, 0.06854082, 0.10759672, 0.258621, -0.23658031, 0.06974518, -0.30307853, 0.010062803, 0.24419631, -0.17857389, -0.061605684, 0.15594025, 0.20267735, -0.14955261, -0.11479992, 0.39396986, 0.23352055, 0.012666446, -0.0962206, -0.09166719, -0.35843217, 0.33310696, 0.28614143, 0.19793509, -0.014299779, 0.03125637, -0.15131183, 0.26195115, -0.092621945, -0.09774493, -0.10871576, -0.20030053, -0.014465115, -0.058903776, 0.010456275, -0.12946285, 0.090752184, -0.13612632, -0.15917492, 0.073066816, -0.07082851, 0.15889694, 0.16751619, -0.31353, -0.28896278, 0.061326317, -0.5799537, -0.11939915, 0.28790224, 0.44152555, -0.119611256, 0.19590247, 0.051425744, 0.21189548, -0.01531638, -0.046113886, -0.056209974, -0.10596114, 0.18074782, 0.27465618, -0.20207298, -0.38995308, 0.055122897, 0.023203688, -0.0892057, 0.0093817655, -0.014075832, -0.06670877, -0.15410645, -0.1759522, 0.083973736, -0.24712972, -0.13847028, 0.26776898, -0.03092194, -0.20474298, -0.010832185, 0.28615507, 0.06926395, -0.09348502, -0.15698338, 0.41989443, 0.3010278, 0.005741507, 0.010479024, 0.16931498, 0.12767749, -0.29227796, 0.42611268, -0.30473405, -0.0071336213, -0.100375555, 0.09961725, 0.14721175, -0.21532084, 0.28975573, 0.15805171, 0.2587544, 0.18892317, 0.08308744, -0.053409446, 0.14420575, -0.117936045, 0.16155364, 0.21884927, 0.11623547, 0.016398214, -0.34012944, -0.23610827, 0.25944826, 0.3265321, 0.17884685, -0.029824225, 0.17586859, 0.11585, 0.22395201, 0.1441974, -0.40962303, 0.055336952, 0.33760974, 0.11725796, 0.1549155, -0.057688136, -0.27527377, -0.23793808, -0.06886171, 0.04727421, -0.32366428, -0.124764286, 0.37829912, 0.044418834, -0.014433529, -0.15863699, -0.23130025, -0.014072199, -0.116705395, 0.026241766, 0.082573965, -0.089801684, -0.39718676, -0.09980003, -0.5494743, -0.1430713, 0.21158108, -0.30317372, 0.24022597, -0.3023537, 0.111940525, 0.37496758, 0.05516233, 0.011221796, -0.18284394, -0.0047331, 0.13796207, 0.32586053, 0.23857506, -0.38834578, 0.09571896, 0.15859152, 0.2515127, 0.12696691, -0.037285045, -0.13651451, 0.13980338, -0.198333, 0.15924208, -0.20204458, 0.18000285, -0.24857216, -0.20852609, 0.29131892, -0.41542646, -0.04368751, 0.10883073, 0.26118433, 0.013824453, -0.027603198, -0.08514061, 0.117200404, 0.18326314, 0.14687406, -0.3891525, 0.27375975, -0.02553157, -0.033559944, -0.038643405, 0.1725868, 0.24817786, 0.10062808, -0.3984106, -0.15110281, 0.107347734, 0.2811384, -0.22248136, 0.17181505, -0.30180675, -0.37590006, -0.13890691, 0.1929018, 0.21564123, 0.14888352, -0.2589628, 0.18979052, -0.09733246, -0.44260895, -0.3626373, -0.10656619, 0.24600953, 0.17717755, 0.17176694, 0.24860108, 0.02450866, 0.13089818, 0.14760958, 0.1889736, -0.13462782, 0.17552358, -0.36474925, -0.054144062, -0.2719437, -0.20575465, -0.2259518, 0.390171, -0.1989032, 0.23816091, 0.39332652, -0.30287528, -0.11111271, 0.15751657, 0.10828888, 0.061609615, -0.13818201, 0.21809433, 0.17633602, -0.10041227, 0.22838311, -0.0037272298, 0.27824283, 0.19050625, 0.09929577, 0.14321278, 0.12866658, -0.1560034, 0.054702457, 0.0034277993, -0.023457147, -0.24974422, -0.12811284, 0.2138116, -0.043564834, 0.024615834, -0.17061573, -0.105848975, 0.01979917, 0.3876299, -0.3644679, 0.25748017, 0.0794696, 0.15106939, -0.2271831, -0.2029017, 0.06485374, 0.18893777, -0.40528575, -0.0067824377, 0.15022416, 0.11026635, 0.21823986, 0.25255138, -0.009357705, -0.11272444, 0.48919556, -0.14951092, -0.12166809, 0.28113428, -0.26702005, -0.2696553, 0.25774094, -0.020699237, 0.30593178, 0.13539925, 0.023027057, 0.04737817, -0.60946774, 0.07452417, -0.4618816, -0.012704029, 0.017143121, -0.0928661, -0.18195343, 0.13803084, 0.26418334, -0.25597656, -0.028610228, 0.20601481, 0.06913206, -0.105633095, 0.48713952, -0.017569678, 0.21389663, -0.06817315, 0.26016077, -0.2034709, 0.27557772, -0.27667582, -0.11112172, 0.017718645, 0.084379435, 0.043280818, -0.059255335, -0.3404926, 0.22076394, 0.0044043344, -0.037489407, -0.06305672, 0.09715581, -0.023487065, 0.062111195, 0.05335917, 0.32398656, 0.2121781, -0.015352369, -0.37581238, -0.02546437, -0.1003463, 0.049476393, 0.01883333, -0.03315766, 0.43547428, -0.10425473, 0.011709515, -0.13826483, 0.26741698, 0.20992693, 0.14296642, 0.12905589, 0.091058806, 0.15014836, -0.033118833, -0.0050203684, -0.1263514, -0.2340833, -0.27371702, 0.21375184, -0.22322327, -0.15550363, 0.16027404, 0.22693431, -0.12760872, 0.16786651, 0.30304092, 0.1020746, -0.15578815, 0.25656548, -0.101949446, 0.127458, 0.31914127, -0.029818093, 0.17126171, 0.5050371, 0.2140625, -0.36993846, -0.019031882, -0.21702774, -0.0075844345, 0.23762833, -0.13128382, 0.17342623, 0.3832189, 0.30292338, 0.45272982, -0.014391475, -0.10896908, 0.092533424, 0.22071956, 0.023630338, -0.15142313, -0.16476907, 0.2736263, 0.06355073, -0.14254087, -0.017445322, -0.15101986, 0.018103518, -0.13215697, -0.39054778, 0.031270053, 0.20609067, -0.4899748, 0.10231394, -0.28053063, 0.042639177, -0.2338356, 0.21435437, -0.23163229, -0.10278779, 0.38594186, -0.08828131, 0.033451065, -0.15233083, -0.14713119, 0.015353501, 0.024095697, -0.04088602, -0.021885296, 0.35037458, -0.12262792, 0.029172843, 0.031767074, 0.20502482, -0.059423614, 0.17006303, 0.03032189, -0.14278243, -0.38293302, 0.12350028, -0.20444815, -0.446956, -0.38407224, 0.36807615, -0.14949287, -0.25289682, -0.22304098, -0.26412737, 0.054438926, 0.1866949, 0.46163344, -0.3870432, -0.09577562, 0.4925763, -0.07591358, -0.17125858, 0.29294312, 0.18506376, -0.3011424, 0.33213896, 0.27247593, -0.04371708, 0.022509042, 0.5281235, 0.11490675, 0.18309964, -0.2098084, 0.44932392, -0.2238262, 0.32604268, -0.15083954, -0.19439663, -0.21758024, 0.016958022, 0.31868294, 0.20522523, -0.4294383, -0.1155165, 0.038857397, 0.35014954, -0.38389844, -0.06027076, -0.0022453207, -0.33463535, 0.14471063, 0.08705408, 0.21836443, -0.37401456, -0.018341504, 0.39743432, -0.28624484, 0.11763633, 0.31765255, 0.06626895, 0.36575744, -0.022255607, -0.010258075, 0.034847856, -0.23807333, -0.009544487, 0.13759036, 0.5633902, 0.15055975, -0.3893473, 0.099084534, 0.2487808, -0.18436259, 0.2964102, -0.0974038, -0.030039463, 0.26750058, -0.0327042, 0.13959204, -0.09072471, -0.23449129, -0.31150666, 0.37843606, -0.20243177, -0.10671544, -0.15774731, -0.100252956, -0.17092793, 0.043813135, -0.3745126, 0.324973, 0.13483994, -0.17662391, -0.082839765, -0.095041975, -0.13983752, -0.2234264, -0.25730914, 0.43138906, -0.15586619, -0.45626152, 0.25078535, 0.032389496, 0.3605528, 0.043308545, 0.082841, -0.05712051, 0.15781389, 0.09113666, -0.12152442, 0.26758134, 0.07198326, -0.5393054, -0.12838638, -0.20900357, 0.08293295, 0.20305818, -0.34222803, 0.030090628, 0.011068957, 0.15267001, 0.0166323, -0.09774065, -0.07362094, 0.4034169, 0.24240533, 0.29080132, 0.093652435, 0.22641939, -0.027256148, -0.33472311, 0.0421845, 0.084676325, -0.18856609, 0.41979697, -0.0742718, -0.37898067, -0.06616097, 0.40372992, 0.097068146, 0.028025001, -0.053321853, 0.21186478, 0.1617424, -0.14168897, 0.17353843, -0.026624104, -0.13292201, -0.10193648, 0.082835816, -0.2110324, 0.047005363, -0.15010522, -0.011524656, -0.21158908, 0.013927639, -0.20468919, 0.2549774, -0.30430713, 0.08803344, 0.04733768, 0.29572278, -0.34726125, -0.16165416, -0.07808548, 0.13735756, 0.26037022, 0.34519985, 0.02743408, 0.017144704, -0.16387558, -0.24629596, 0.05140357, -0.2149917, 0.14342026, 0.09417347, 0.2471141, -0.3102873, -0.1811054, 0.21190523, -0.0939989, -0.14163078, 0.39620474, 0.26059932, 0.2017156, 0.016578814, 0.2596664, 0.023013765, -0.18858111, -0.12537004, -0.2575313, 0.07671513, -0.084907316, -0.05217875, -0.0572651, -0.11264922, -0.20340498, -0.15451567, 0.15938708, 0.13023944, 0.024635538, -0.04755041, -0.013276761, -0.27871516, 0.32686794, 0.025345188, 0.056448072, -0.05157638, 0.055301744, -0.14052545, 0.23356776, 0.21044475, 0.074066274, -0.19347996, -0.075556666, -0.2903639, -0.35939184, 0.06149839, 0.122687556, 0.10089751, -0.076755464, -0.23811671, -0.00608524, -0.15459716, 0.19323912, 0.024601229, -0.16377638, -0.085954376, -0.042363558, -0.026964856, 0.068481274, -0.19715573, -0.19233052, -0.1171513, -0.07371926, -0.06850815, 0.3406645, -0.045532167, 0.27094424, -0.14512636, -0.004277191, -0.17940599, 0.10672061, -0.0615622, 0.05335321, 0.26168355, -0.4460772, -0.16097912, -0.03129851, -0.18674865, -0.14036278, -0.08535438, -0.030956995, 0.2338251, -0.3615369, 0.21861416, -0.12684713, 0.18677162, -0.07666991, -0.25236502, -0.14463082, -0.011356218, 0.24449459, -0.3400799, -0.2528164, -0.2593893, -0.09919247, -0.06947468, -0.2653912, 0.41220212, -0.105682805, -0.06773623, 0.0067983535, 0.4691811, 0.21326056, 0.16177358, 0.22778879, -0.032021854, 0.030037731, 0.09594971, -0.45707417, 0.25093716, -0.24953341, -0.13460502, 0.012793443, 0.123196214, -0.007967554, 0.021584822, -0.13040859, -0.09390109, 0.21663658, -0.35579094, -0.04199463, 0.26041034, 0.14653488, -0.24590142, 0.025850672, 0.12220801, 0.38460156, 0.07916422, -0.2231287, 0.10919217, -0.3318051, -0.01628104, -0.19389302, -0.30398846, 0.16407341, -0.07596026, 0.077111095, -0.100624435, -0.30112073, 0.20480298, -0.079184264, -0.07005212, 0.42865524, 0.028614324, -0.12899904, 0.15557994, 0.021030005, 0.036799897, -0.099277705, 0.2806157, 0.23383352, -0.28756225, 0.15074758, -0.13403846, -0.05876428, -0.07657761]}] |1634839239 |[(1510101612,0.12448559273510636),(1274183715,0.3554576544360366),(-612640902,0.37472233818858686),(-1548374770,0.39013800843493296),(-1719102856,0.3901714913624425),(1293373212,0.39846872824443047),(-1320876223,0.3992484826857293)]|1510101612 |0.12448559273510636 |\n", - "|Third document, climate change is arguably one of the most pressing problems of our time. |[{document, 0, 88, Third document, climate change is arguably one of the most pressing problems of our time., {sentence -> 0}, []}] |[{document, 0, 88, Third document, climate change is arguably one of the most pressing problems of our time., {sentence -> 0}, []}] |[{token, 0, 4, Third, {sentence -> 0}, []}, {token, 6, 13, document, {sentence -> 0}, []}, {token, 14, 14, ,, {sentence -> 0}, []}, {token, 16, 22, climate, {sentence -> 0}, []}, {token, 24, 29, change, {sentence -> 0}, []}, {token, 31, 32, is, {sentence -> 0}, []}, {token, 34, 41, arguably, {sentence -> 0}, []}, {token, 43, 45, one, {sentence -> 0}, []}, {token, 47, 48, of, {sentence -> 0}, []}, {token, 50, 52, the, {sentence -> 0}, []}, {token, 54, 57, most, {sentence -> 0}, []}, {token, 59, 66, pressing, {sentence -> 0}, []}, {token, 68, 75, problems, {sentence -> 0}, []}, {token, 77, 78, of, {sentence -> 0}, []}, {token, 80, 82, our, {sentence -> 0}, []}, {token, 84, 87, time, {sentence -> 0}, []}, {token, 88, 88, ., {sentence -> 0}, []}] |[{sentence_embeddings, 0, 88, Third document, climate change is arguably one of the most pressing problems of our time., {sentence -> 0, token -> Third document, climate change is arguably one of the most pressing problems of our time., pieceId -> -1, isWordStart -> true}, [-0.008408386, -0.20978682, -0.21336395, -0.06701713, 0.14906062, 0.19436543, 0.2689835, -0.0736583, -0.085654296, -0.17641146, 0.23118123, 9.153709E-4, -0.095650904, 0.09745815, -0.12966505, 0.4991491, 0.21626908, -0.46584547, 0.036485918, -0.03004483, -0.2527194, 0.066779986, 0.4715149, 0.3309422, 0.10767734, 0.0772766, -0.13837156, -0.04646236, 0.18541306, 0.24775545, 0.29829848, 0.06701967, 0.10679874, 0.2371618, -0.2348681, 0.060263738, -0.30324578, 0.011110125, 0.25174752, -0.21265753, -0.07388571, 0.16692835, 0.21960892, -0.17161724, -0.10685435, 0.40878117, 0.22626911, 0.02965304, -0.12355306, -0.08647978, -0.36791545, 0.3402064, 0.29458448, 0.18367597, 0.018515693, 0.016963914, -0.16416565, 0.23452581, -0.08742108, -0.10209338, -0.11075058, -0.21584131, -0.0066427714, -0.057909314, 0.02990299, -0.1532408, 0.10471724, -0.15802106, -0.1316932, 0.0671004, -0.08458309, 0.14720063, 0.17743425, -0.29781777, -0.25875703, 0.052992765, -0.601354, -0.12203182, 0.28960162, 0.43318143, -0.11552292, 0.189184, 0.058978103, 0.22802205, -0.01887668, -0.06247763, -0.055835284, -0.10375289, 0.1687678, 0.28621984, -0.20247233, -0.39158803, 0.047121175, 0.03066416, -0.09131708, 0.0064128875, -0.008600525, -0.07319706, -0.17605102, -0.21015038, 0.09048563, -0.27109554, -0.14635244, 0.25777438, -0.03020101, -0.2038186, -0.015589433, 0.29672548, 0.078363374, -0.09256426, -0.17258734, 0.41875502, 0.32983577, -0.007111206, 0.028960332, 0.17240305, 0.11726792, -0.2967749, 0.42062518, -0.30623642, -0.018673455, -0.1085469, 0.13539384, 0.15505964, -0.2071793, 0.29197103, 0.13746233, 0.2660806, 0.19184366, 0.09343793, -0.03761699, 0.1458473, -0.118322745, 0.13873918, 0.20815493, 0.11604553, -0.0011643076, -0.35143498, -0.21321355, 0.2749992, 0.35669494, 0.1721764, -0.05118688, 0.19282715, 0.1083012, 0.20331307, 0.14773382, -0.400585, 0.04467102, 0.3526557, 0.12134734, 0.15779114, -0.07686861, -0.2722919, -0.25330427, -0.08266977, 0.051231362, -0.32711402, -0.13077538, 0.38633937, 0.029779907, 0.002141976, -0.15053126, -0.21427684, -0.03073989, -0.12185571, 0.030493725, 0.09356483, -0.08676565, -0.4158287, -0.11258328, -0.55825645, -0.12975422, 0.1916057, -0.3010512, 0.25087264, -0.28968638, 0.0923594, 0.39556426, 0.04983867, -0.0029593082, -0.19394685, -0.025053008, 0.1306449, 0.3228945, 0.23752072, -0.39854398, 0.09963036, 0.17405853, 0.260578, 0.13261874, -0.042916767, -0.13150778, 0.14103265, -0.18873711, 0.18550456, -0.21395086, 0.16902032, -0.2656128, -0.2184284, 0.28208458, -0.40157455, -0.031665407, 0.065816626, 0.27281633, 0.0017664516, -0.056637276, -0.08971444, 0.091889165, 0.18004844, 0.14723916, -0.40347266, 0.28464735, -0.029195225, -0.03000159, -0.024617586, 0.196627, 0.25832683, 0.10727102, -0.3850497, -0.14131209, 0.11651645, 0.28590408, -0.21434435, 0.18322834, -0.29039705, -0.3833519, -0.12992118, 0.20409176, 0.21254545, 0.17306952, -0.25796944, 0.18709883, -0.106259346, -0.43154985, -0.36494273, -0.11600778, 0.24581338, 0.18781164, 0.18708797, 0.25258115, 0.04334878, 0.11094503, 0.16887024, 0.18969853, -0.16395922, 0.17028388, -0.3653791, -0.062085465, -0.27042845, -0.20845734, -0.22975564, 0.38791245, -0.21153997, 0.22299886, 0.4067098, -0.3180967, -0.119020626, 0.16592985, 0.08134824, 0.07111609, -0.11281331, 0.21532886, 0.17626029, -0.11157626, 0.24869683, 0.002167793, 0.25803974, 0.17146061, 0.11219464, 0.14177255, 0.13628139, -0.14174575, 0.040052604, 0.0049264827, -0.027609568, -0.25137493, -0.14084421, 0.20105876, -0.058221187, 0.030821402, -0.15669239, -0.097033136, 0.02064872, 0.40998495, -0.3793549, 0.26364204, 0.09185891, 0.13761812, -0.23594798, -0.21576473, 0.064160846, 0.18441695, -0.39402333, 0.011562835, 0.15811875, 0.10871025, 0.2278601, 0.26277956, -0.011325682, -0.12331526, 0.4965102, -0.15407956, -0.13325658, 0.27157038, -0.25075117, -0.27684662, 0.28200155, -0.010683825, 0.30480832, 0.13320027, 0.031010821, 0.07333867, -0.6036074, 0.06996156, -0.4761334, -0.0033244402, 0.03359116, -0.097072996, -0.19554745, 0.15043412, 0.29090378, -0.24856627, -0.02541361, 0.2121456, 0.08295338, -0.09810014, 0.4901113, -0.022632308, 0.22354102, -0.060473014, 0.26167145, -0.21305074, 0.25782862, -0.26308486, -0.09908113, 0.007932383, 0.08765336, 0.052684747, -0.05878163, -0.31929326, 0.21060982, -0.006360022, -0.035593312, -0.04580954, 0.092614815, -0.010202253, 0.053435646, 0.06810382, 0.33141363, 0.23865972, -0.022269802, -0.35638827, -0.017010234, -0.09842304, 0.044951804, 0.013101696, 0.012481737, 0.4473684, -0.07812174, 0.008848828, -0.10374172, 0.26849997, 0.19759308, 0.14334072, 0.12648976, 0.085015506, 0.16048867, -0.04428103, -0.02241106, -0.15643533, -0.2261526, -0.27816802, 0.21147077, -0.23083511, -0.15736212, 0.16844489, 0.21279429, -0.116731785, 0.13067369, 0.3122624, 0.136315, -0.16236319, 0.266095, -0.118092746, 0.08463656, 0.3190953, -0.025155172, 0.17036186, 0.517092, 0.21172866, -0.37978435, -0.033721708, -0.20605312, -0.010164018, 0.260588, -0.16144288, 0.1739176, 0.37816438, 0.31039205, 0.44743317, -0.006673559, -0.11675396, 0.07569114, 0.20170745, 0.012370929, -0.17593956, -0.16927823, 0.26412624, 0.05698451, -0.14810121, -0.023506388, -0.16487631, 0.026122056, -0.14697117, -0.4044809, 0.04526173, 0.21642078, -0.4945617, 0.10950964, -0.29737392, 0.06892894, -0.2156567, 0.22239617, -0.23322712, -0.10665675, 0.39332575, -0.09363847, 0.050910737, -0.19002058, -0.1574706, 0.02213912, 0.036516193, -0.028725138, -0.017847586, 0.3557758, -0.12832177, 0.048198838, 4.3433104E-4, 0.22483422, -0.061355013, 0.18990348, 0.03564027, -0.1394448, -0.39040518, 0.15794353, -0.20287846, -0.4367065, -0.37070453, 0.35814124, -0.14648235, -0.26215193, -0.2346811, -0.27164423, 0.06335842, 0.18181078, 0.4646271, -0.39600596, -0.079446204, 0.49517226, -0.04852778, -0.16844611, 0.2967518, 0.19760415, -0.31121883, 0.34088603, 0.25469545, -0.018662168, 0.02773218, 0.51845956, 0.13568775, 0.18219014, -0.23326635, 0.46436208, -0.21790144, 0.31559125, -0.17255287, -0.20793658, -0.20586798, -0.001172778, 0.3308969, 0.17670253, -0.42809558, -0.10740692, 0.036743138, 0.35039994, -0.37736034, -0.06748311, 0.0051405686, -0.35943413, 0.1323299, 0.076594, 0.23542015, -0.38947168, -0.0032625643, 0.40785658, -0.3197368, 0.13108508, 0.31222928, 0.10504441, 0.376466, -0.039943226, 0.0075853164, 0.062115036, -0.23973337, -0.015673233, 0.124596596, 0.567843, 0.13057941, -0.40122274, 0.10246062, 0.25044444, -0.19272071, 0.3008771, -0.085480474, -0.04598082, 0.27369395, -0.024367249, 0.16280788, -0.09723148, -0.2331273, -0.31018296, 0.3681214, -0.20254564, -0.099116005, -0.16499473, -0.111495964, -0.13236894, 0.04578884, -0.36830792, 0.33352196, 0.118905395, -0.20436546, -0.08910314, -0.07557359, -0.15557393, -0.234037, -0.2726709, 0.43529606, -0.1588916, -0.44991276, 0.2639855, 0.03949102, 0.3693944, 0.016801286, 0.10393568, -0.069996394, 0.15449597, 0.08037712, -0.13697414, 0.28319156, 0.066741705, -0.5410128, -0.14622404, -0.2038448, 0.08338539, 0.2011268, -0.33919087, 0.04361721, 0.02101723, 0.14619805, 0.030959083, -0.09712685, -0.07434212, 0.403413, 0.22790873, 0.28365055, 0.09378035, 0.23105437, -0.029275687, -0.3173693, 0.0543687, 0.07409911, -0.2017265, 0.45038718, -0.08160741, -0.39471105, -0.066249356, 0.41151386, 0.08760141, 0.007330824, -0.066278905, 0.21011162, 0.1539203, -0.12142708, 0.1757174, -0.026292767, -0.14986737, -0.113564014, 0.07391096, -0.21108653, 0.06545459, -0.16722327, -0.006594374, -0.20676054, 0.016034845, -0.20714003, 0.2670048, -0.31555256, 0.10177538, 0.06680942, 0.28953385, -0.34272602, -0.1623317, -0.076802626, 0.15219814, 0.26613286, 0.34149784, 0.032789387, 0.028899977, -0.18126866, -0.2511501, 0.081135035, -0.21000236, 0.16121005, 0.08403291, 0.24493581, -0.32109466, -0.20124927, 0.20610592, -0.10798646, -0.14663638, 0.40788773, 0.24154414, 0.21223918, 0.0129725, 0.26252237, 0.025581531, -0.17492732, -0.12604713, -0.24954422, 0.07021474, -0.078310095, -0.051400866, -0.035224427, -0.11225072, -0.1952861, -0.17365551, 0.14578226, 0.14051022, 0.022287847, -0.057631906, -0.0139911715, -0.27890974, 0.32389244, 0.02819324, 0.05819198, -0.069384664, 0.049535263, -0.13024889, 0.23608364, 0.21746805, 0.077531695, -0.19169594, -0.06775553, -0.27078623, -0.35225046, 0.058470953, 0.12146965, 0.13006094, -0.06770603, -0.25335953, 0.007816828, -0.13747253, 0.17829442, 0.011401831, -0.16540085, -0.103149, -0.069404215, -0.03443399, 0.077324726, -0.19909632, -0.20789692, -0.10723613, -0.08203292, -0.07713148, 0.34724632, -0.06089041, 0.2798024, -0.14503583, -0.0020078092, -0.15062527, 0.13015121, -0.065569915, 0.056575716, 0.25992274, -0.46167943, -0.15914112, -0.026965503, -0.20004211, -0.141356, -0.077509604, -0.027742084, 0.22565441, -0.36088252, 0.2413167, -0.12192323, 0.18409002, -0.08263175, -0.25700292, -0.16171978, -0.0079028085, 0.24911962, -0.3509631, -0.25993952, -0.2676477, -0.08899839, -0.06546339, -0.26874635, 0.42236093, -0.12012787, -0.08681204, 0.0034873153, 0.4458711, 0.20714682, 0.17381635, 0.20979515, -0.0273073, 0.03752274, 0.11206485, -0.46285358, 0.22496869, -0.2342749, -0.12822811, 0.0051306086, 0.109438226, -0.02305657, 0.030262345, -0.12668712, -0.11175425, 0.21646728, -0.37218267, -0.06411932, 0.27182007, 0.16698217, -0.26499093, 0.042440798, 0.13600044, 0.37879592, 0.11507201, -0.2173002, 0.1320668, -0.32382184, -0.037356067, -0.20496605, -0.2866237, 0.16485146, -0.081692345, 0.06725929, -0.10196793, -0.28608373, 0.21656291, -0.064518094, -0.07937422, 0.42675838, 0.0076278546, -0.12415507, 0.14044033, 0.014213737, 0.030048521, -0.0996337, 0.2774318, 0.20182906, -0.2818618, 0.1473123, -0.1531179, -0.04014678, -0.10299498]}] |[{doc_similarity_rankings, 0, 88, Third document, climate change is arguably one of the most pressing problems of our time., {pieceId -> -1, lshId -> -612640902, isWordStart -> true, token -> Third document, climate change is arguably one of the most pressing problems of our time., lshNeighbors -> [(1274183715,0.12201215887654807),(-1719102856,0.2991777399965483),(-1548374770,0.31909423657258823),(-1320876223,0.32308714836804664),(1293373212,0.3656377678477694),(1634839239,0.37472233818858686),(1510101612,0.3851201869623605)], sentence -> 0}, [-0.008408386, -0.20978682, -0.21336395, -0.06701713, 0.14906062, 0.19436543, 0.2689835, -0.0736583, -0.085654296, -0.17641146, 0.23118123, 9.153709E-4, -0.095650904, 0.09745815, -0.12966505, 0.4991491, 0.21626908, -0.46584547, 0.036485918, -0.03004483, -0.2527194, 0.066779986, 0.4715149, 0.3309422, 0.10767734, 0.0772766, -0.13837156, -0.04646236, 0.18541306, 0.24775545, 0.29829848, 0.06701967, 0.10679874, 0.2371618, -0.2348681, 0.060263738, -0.30324578, 0.011110125, 0.25174752, -0.21265753, -0.07388571, 0.16692835, 0.21960892, -0.17161724, -0.10685435, 0.40878117, 0.22626911, 0.02965304, -0.12355306, -0.08647978, -0.36791545, 0.3402064, 0.29458448, 0.18367597, 0.018515693, 0.016963914, -0.16416565, 0.23452581, -0.08742108, -0.10209338, -0.11075058, -0.21584131, -0.0066427714, -0.057909314, 0.02990299, -0.1532408, 0.10471724, -0.15802106, -0.1316932, 0.0671004, -0.08458309, 0.14720063, 0.17743425, -0.29781777, -0.25875703, 0.052992765, -0.601354, -0.12203182, 0.28960162, 0.43318143, -0.11552292, 0.189184, 0.058978103, 0.22802205, -0.01887668, -0.06247763, -0.055835284, -0.10375289, 0.1687678, 0.28621984, -0.20247233, -0.39158803, 0.047121175, 0.03066416, -0.09131708, 0.0064128875, -0.008600525, -0.07319706, -0.17605102, -0.21015038, 0.09048563, -0.27109554, -0.14635244, 0.25777438, -0.03020101, -0.2038186, -0.015589433, 0.29672548, 0.078363374, -0.09256426, -0.17258734, 0.41875502, 0.32983577, -0.007111206, 0.028960332, 0.17240305, 0.11726792, -0.2967749, 0.42062518, -0.30623642, -0.018673455, -0.1085469, 0.13539384, 0.15505964, -0.2071793, 0.29197103, 0.13746233, 0.2660806, 0.19184366, 0.09343793, -0.03761699, 0.1458473, -0.118322745, 0.13873918, 0.20815493, 0.11604553, -0.0011643076, -0.35143498, -0.21321355, 0.2749992, 0.35669494, 0.1721764, -0.05118688, 0.19282715, 0.1083012, 0.20331307, 0.14773382, -0.400585, 0.04467102, 0.3526557, 0.12134734, 0.15779114, -0.07686861, -0.2722919, -0.25330427, -0.08266977, 0.051231362, -0.32711402, -0.13077538, 0.38633937, 0.029779907, 0.002141976, -0.15053126, -0.21427684, -0.03073989, -0.12185571, 0.030493725, 0.09356483, -0.08676565, -0.4158287, -0.11258328, -0.55825645, -0.12975422, 0.1916057, -0.3010512, 0.25087264, -0.28968638, 0.0923594, 0.39556426, 0.04983867, -0.0029593082, -0.19394685, -0.025053008, 0.1306449, 0.3228945, 0.23752072, -0.39854398, 0.09963036, 0.17405853, 0.260578, 0.13261874, -0.042916767, -0.13150778, 0.14103265, -0.18873711, 0.18550456, -0.21395086, 0.16902032, -0.2656128, -0.2184284, 0.28208458, -0.40157455, -0.031665407, 0.065816626, 0.27281633, 0.0017664516, -0.056637276, -0.08971444, 0.091889165, 0.18004844, 0.14723916, -0.40347266, 0.28464735, -0.029195225, -0.03000159, -0.024617586, 0.196627, 0.25832683, 0.10727102, -0.3850497, -0.14131209, 0.11651645, 0.28590408, -0.21434435, 0.18322834, -0.29039705, -0.3833519, -0.12992118, 0.20409176, 0.21254545, 0.17306952, -0.25796944, 0.18709883, -0.106259346, -0.43154985, -0.36494273, -0.11600778, 0.24581338, 0.18781164, 0.18708797, 0.25258115, 0.04334878, 0.11094503, 0.16887024, 0.18969853, -0.16395922, 0.17028388, -0.3653791, -0.062085465, -0.27042845, -0.20845734, -0.22975564, 0.38791245, -0.21153997, 0.22299886, 0.4067098, -0.3180967, -0.119020626, 0.16592985, 0.08134824, 0.07111609, -0.11281331, 0.21532886, 0.17626029, -0.11157626, 0.24869683, 0.002167793, 0.25803974, 0.17146061, 0.11219464, 0.14177255, 0.13628139, -0.14174575, 0.040052604, 0.0049264827, -0.027609568, -0.25137493, -0.14084421, 0.20105876, -0.058221187, 0.030821402, -0.15669239, -0.097033136, 0.02064872, 0.40998495, -0.3793549, 0.26364204, 0.09185891, 0.13761812, -0.23594798, -0.21576473, 0.064160846, 0.18441695, -0.39402333, 0.011562835, 0.15811875, 0.10871025, 0.2278601, 0.26277956, -0.011325682, -0.12331526, 0.4965102, -0.15407956, -0.13325658, 0.27157038, -0.25075117, -0.27684662, 0.28200155, -0.010683825, 0.30480832, 0.13320027, 0.031010821, 0.07333867, -0.6036074, 0.06996156, -0.4761334, -0.0033244402, 0.03359116, -0.097072996, -0.19554745, 0.15043412, 0.29090378, -0.24856627, -0.02541361, 0.2121456, 0.08295338, -0.09810014, 0.4901113, -0.022632308, 0.22354102, -0.060473014, 0.26167145, -0.21305074, 0.25782862, -0.26308486, -0.09908113, 0.007932383, 0.08765336, 0.052684747, -0.05878163, -0.31929326, 0.21060982, -0.006360022, -0.035593312, -0.04580954, 0.092614815, -0.010202253, 0.053435646, 0.06810382, 0.33141363, 0.23865972, -0.022269802, -0.35638827, -0.017010234, -0.09842304, 0.044951804, 0.013101696, 0.012481737, 0.4473684, -0.07812174, 0.008848828, -0.10374172, 0.26849997, 0.19759308, 0.14334072, 0.12648976, 0.085015506, 0.16048867, -0.04428103, -0.02241106, -0.15643533, -0.2261526, -0.27816802, 0.21147077, -0.23083511, -0.15736212, 0.16844489, 0.21279429, -0.116731785, 0.13067369, 0.3122624, 0.136315, -0.16236319, 0.266095, -0.118092746, 0.08463656, 0.3190953, -0.025155172, 0.17036186, 0.517092, 0.21172866, -0.37978435, -0.033721708, -0.20605312, -0.010164018, 0.260588, -0.16144288, 0.1739176, 0.37816438, 0.31039205, 0.44743317, -0.006673559, -0.11675396, 0.07569114, 0.20170745, 0.012370929, -0.17593956, -0.16927823, 0.26412624, 0.05698451, -0.14810121, -0.023506388, -0.16487631, 0.026122056, -0.14697117, -0.4044809, 0.04526173, 0.21642078, -0.4945617, 0.10950964, -0.29737392, 0.06892894, -0.2156567, 0.22239617, -0.23322712, -0.10665675, 0.39332575, -0.09363847, 0.050910737, -0.19002058, -0.1574706, 0.02213912, 0.036516193, -0.028725138, -0.017847586, 0.3557758, -0.12832177, 0.048198838, 4.3433104E-4, 0.22483422, -0.061355013, 0.18990348, 0.03564027, -0.1394448, -0.39040518, 0.15794353, -0.20287846, -0.4367065, -0.37070453, 0.35814124, -0.14648235, -0.26215193, -0.2346811, -0.27164423, 0.06335842, 0.18181078, 0.4646271, -0.39600596, -0.079446204, 0.49517226, -0.04852778, -0.16844611, 0.2967518, 0.19760415, -0.31121883, 0.34088603, 0.25469545, -0.018662168, 0.02773218, 0.51845956, 0.13568775, 0.18219014, -0.23326635, 0.46436208, -0.21790144, 0.31559125, -0.17255287, -0.20793658, -0.20586798, -0.001172778, 0.3308969, 0.17670253, -0.42809558, -0.10740692, 0.036743138, 0.35039994, -0.37736034, -0.06748311, 0.0051405686, -0.35943413, 0.1323299, 0.076594, 0.23542015, -0.38947168, -0.0032625643, 0.40785658, -0.3197368, 0.13108508, 0.31222928, 0.10504441, 0.376466, -0.039943226, 0.0075853164, 0.062115036, -0.23973337, -0.015673233, 0.124596596, 0.567843, 0.13057941, -0.40122274, 0.10246062, 0.25044444, -0.19272071, 0.3008771, -0.085480474, -0.04598082, 0.27369395, -0.024367249, 0.16280788, -0.09723148, -0.2331273, -0.31018296, 0.3681214, -0.20254564, -0.099116005, -0.16499473, -0.111495964, -0.13236894, 0.04578884, -0.36830792, 0.33352196, 0.118905395, -0.20436546, -0.08910314, -0.07557359, -0.15557393, -0.234037, -0.2726709, 0.43529606, -0.1588916, -0.44991276, 0.2639855, 0.03949102, 0.3693944, 0.016801286, 0.10393568, -0.069996394, 0.15449597, 0.08037712, -0.13697414, 0.28319156, 0.066741705, -0.5410128, -0.14622404, -0.2038448, 0.08338539, 0.2011268, -0.33919087, 0.04361721, 0.02101723, 0.14619805, 0.030959083, -0.09712685, -0.07434212, 0.403413, 0.22790873, 0.28365055, 0.09378035, 0.23105437, -0.029275687, -0.3173693, 0.0543687, 0.07409911, -0.2017265, 0.45038718, -0.08160741, -0.39471105, -0.066249356, 0.41151386, 0.08760141, 0.007330824, -0.066278905, 0.21011162, 0.1539203, -0.12142708, 0.1757174, -0.026292767, -0.14986737, -0.113564014, 0.07391096, -0.21108653, 0.06545459, -0.16722327, -0.006594374, -0.20676054, 0.016034845, -0.20714003, 0.2670048, -0.31555256, 0.10177538, 0.06680942, 0.28953385, -0.34272602, -0.1623317, -0.076802626, 0.15219814, 0.26613286, 0.34149784, 0.032789387, 0.028899977, -0.18126866, -0.2511501, 0.081135035, -0.21000236, 0.16121005, 0.08403291, 0.24493581, -0.32109466, -0.20124927, 0.20610592, -0.10798646, -0.14663638, 0.40788773, 0.24154414, 0.21223918, 0.0129725, 0.26252237, 0.025581531, -0.17492732, -0.12604713, -0.24954422, 0.07021474, -0.078310095, -0.051400866, -0.035224427, -0.11225072, -0.1952861, -0.17365551, 0.14578226, 0.14051022, 0.022287847, -0.057631906, -0.0139911715, -0.27890974, 0.32389244, 0.02819324, 0.05819198, -0.069384664, 0.049535263, -0.13024889, 0.23608364, 0.21746805, 0.077531695, -0.19169594, -0.06775553, -0.27078623, -0.35225046, 0.058470953, 0.12146965, 0.13006094, -0.06770603, -0.25335953, 0.007816828, -0.13747253, 0.17829442, 0.011401831, -0.16540085, -0.103149, -0.069404215, -0.03443399, 0.077324726, -0.19909632, -0.20789692, -0.10723613, -0.08203292, -0.07713148, 0.34724632, -0.06089041, 0.2798024, -0.14503583, -0.0020078092, -0.15062527, 0.13015121, -0.065569915, 0.056575716, 0.25992274, -0.46167943, -0.15914112, -0.026965503, -0.20004211, -0.141356, -0.077509604, -0.027742084, 0.22565441, -0.36088252, 0.2413167, -0.12192323, 0.18409002, -0.08263175, -0.25700292, -0.16171978, -0.0079028085, 0.24911962, -0.3509631, -0.25993952, -0.2676477, -0.08899839, -0.06546339, -0.26874635, 0.42236093, -0.12012787, -0.08681204, 0.0034873153, 0.4458711, 0.20714682, 0.17381635, 0.20979515, -0.0273073, 0.03752274, 0.11206485, -0.46285358, 0.22496869, -0.2342749, -0.12822811, 0.0051306086, 0.109438226, -0.02305657, 0.030262345, -0.12668712, -0.11175425, 0.21646728, -0.37218267, -0.06411932, 0.27182007, 0.16698217, -0.26499093, 0.042440798, 0.13600044, 0.37879592, 0.11507201, -0.2173002, 0.1320668, -0.32382184, -0.037356067, -0.20496605, -0.2866237, 0.16485146, -0.081692345, 0.06725929, -0.10196793, -0.28608373, 0.21656291, -0.064518094, -0.07937422, 0.42675838, 0.0076278546, -0.12415507, 0.14044033, 0.014213737, 0.030048521, -0.0996337, 0.2774318, 0.20182906, -0.2818618, 0.1473123, -0.1531179, -0.04014678, -0.10299498]}] |-612640902 |[(1274183715,0.12201215887654807),(-1719102856,0.2991777399965483),(-1548374770,0.31909423657258823),(-1320876223,0.32308714836804664),(1293373212,0.3656377678477694),(1634839239,0.37472233818858686),(1510101612,0.3851201869623605)]|1274183715 |0.12201215887654807 |\n", - "|Fourth document, climate change is definitely one of the most pressing problems of our time. |[{document, 0, 91, Fourth document, climate change is definitely one of the most pressing problems of our time., {sentence -> 0}, []}] |[{document, 0, 91, Fourth document, climate change is definitely one of the most pressing problems of our time., {sentence -> 0}, []}] |[{token, 0, 5, Fourth, {sentence -> 0}, []}, {token, 7, 14, document, {sentence -> 0}, []}, {token, 15, 15, ,, {sentence -> 0}, []}, {token, 17, 23, climate, {sentence -> 0}, []}, {token, 25, 30, change, {sentence -> 0}, []}, {token, 32, 33, is, {sentence -> 0}, []}, {token, 35, 44, definitely, {sentence -> 0}, []}, {token, 46, 48, one, {sentence -> 0}, []}, {token, 50, 51, of, {sentence -> 0}, []}, {token, 53, 55, the, {sentence -> 0}, []}, {token, 57, 60, most, {sentence -> 0}, []}, {token, 62, 69, pressing, {sentence -> 0}, []}, {token, 71, 78, problems, {sentence -> 0}, []}, {token, 80, 81, of, {sentence -> 0}, []}, {token, 83, 85, our, {sentence -> 0}, []}, {token, 87, 90, time, {sentence -> 0}, []}, {token, 91, 91, ., {sentence -> 0}, []}] |[{sentence_embeddings, 0, 91, Fourth document, climate change is definitely one of the most pressing problems of our time., {sentence -> 0, token -> Fourth document, climate change is definitely one of the most pressing problems of our time., pieceId -> -1, isWordStart -> true}, [-0.003835852, -0.20862408, -0.21773167, -0.061703123, 0.14652315, 0.19786398, 0.26306406, -0.078452975, -0.08771903, -0.17226249, 0.22881703, 0.0019276487, -0.097814694, 0.09299635, -0.122063614, 0.4991608, 0.21783605, -0.4668241, 0.034899868, -0.032340027, -0.25079423, 0.06777912, 0.46978673, 0.32557002, 0.10782054, 0.074416175, -0.1351248, -0.043467775, 0.18473676, 0.24320771, 0.29688674, 0.063793264, 0.1008339, 0.23997188, -0.23376757, 0.06799241, -0.3010938, 0.016623985, 0.2497724, -0.21152967, -0.07613329, 0.165055, 0.21546067, -0.16450602, -0.104793414, 0.40502572, 0.22857304, 0.026289828, -0.12639655, -0.09120597, -0.36880243, 0.34113964, 0.29069296, 0.18020727, 0.014091524, 0.015363324, -0.16181347, 0.23874591, -0.08743463, -0.100837186, -0.11651775, -0.21363984, -0.0059150266, -0.053012766, 0.025422096, -0.14887391, 0.099708736, -0.15484057, -0.13415912, 0.06851785, -0.0805267, 0.1470088, 0.16662681, -0.29883987, -0.26368806, 0.047838878, -0.5956105, -0.12314607, 0.28308532, 0.4328412, -0.114371665, 0.19504122, 0.052935146, 0.22930811, -0.01836126, -0.058793154, -0.05645759, -0.10266738, 0.17061166, 0.28478023, -0.20342122, -0.391638, 0.04970928, 0.026398072, -0.08933313, 0.0040505435, -0.010148193, -0.072514966, -0.17795344, -0.204243, 0.083572954, -0.27162114, -0.14674117, 0.25768456, -0.031521432, -0.2069687, -0.019544436, 0.30430898, 0.0742721, -0.094543956, -0.17090842, 0.41898516, 0.33163518, 1.12846494E-4, 0.028253844, 0.16404548, 0.123001106, -0.2959598, 0.41834822, -0.30666676, -0.018280396, -0.10533799, 0.13000785, 0.15551804, -0.2095328, 0.28947356, 0.1302748, 0.26099348, 0.19238193, 0.095461756, -0.034195073, 0.15064953, -0.11497837, 0.14000307, 0.21017769, 0.12219769, -0.007975813, -0.34788206, -0.2179525, 0.26906368, 0.35087898, 0.16659309, -0.048696965, 0.19371018, 0.10114278, 0.19875155, 0.14317547, -0.39720836, 0.045373674, 0.3482762, 0.11755119, 0.15139255, -0.07947107, -0.27557716, -0.25579336, -0.07741892, 0.05452082, -0.3229889, -0.1349494, 0.38116154, 0.035242334, -9.4695255E-4, -0.15345678, -0.20902577, -0.028087988, -0.12072554, 0.02527599, 0.094951235, -0.08846783, -0.4103613, -0.11496751, -0.5556833, -0.12119456, 0.19222125, -0.29915833, 0.24936001, -0.2900803, 0.09389836, 0.39451364, 0.052619856, -0.003266785, -0.1926063, -0.025942078, 0.13278645, 0.32179895, 0.24352162, -0.3960295, 0.09980806, 0.17284764, 0.25954893, 0.14217232, -0.04101023, -0.13483965, 0.13977908, -0.18719734, 0.18095459, -0.21917875, 0.1663283, -0.26652965, -0.21226367, 0.27464914, -0.40665168, -0.026470223, 0.07333406, 0.26849043, 7.350019E-4, -0.056252368, -0.08676217, 0.09196398, 0.18072756, 0.14173715, -0.399947, 0.27917507, -0.026699087, -0.034646686, -0.0265695, 0.19165027, 0.25528798, 0.10507765, -0.37910807, -0.14147788, 0.1213717, 0.28017974, -0.2178526, 0.17398752, -0.29096332, -0.3853184, -0.13676582, 0.20780607, 0.21252154, 0.16414383, -0.2619306, 0.18530105, -0.099988185, -0.437557, -0.36251107, -0.11280457, 0.2483923, 0.18430209, 0.19254051, 0.25591743, 0.044537775, 0.11579968, 0.16134737, 0.18732512, -0.1632906, 0.16494152, -0.36760136, -0.063819736, -0.26302108, -0.2046593, -0.2226854, 0.39319545, -0.20637247, 0.2204216, 0.39861265, -0.3154902, -0.110166, 0.15821502, 0.08322516, 0.07195477, -0.11205203, 0.21377234, 0.1727777, -0.1082272, 0.24539624, 0.0072368, 0.255411, 0.18038207, 0.113827385, 0.13634266, 0.13769454, -0.14098604, 0.029769506, 1.07247615E-4, -0.027630894, -0.24913825, -0.14263271, 0.1956553, -0.054647956, 0.025447316, -0.16555777, -0.09804624, 0.021545487, 0.4063341, -0.37619784, 0.25973955, 0.0909772, 0.1343971, -0.2340182, -0.21328466, 0.06330135, 0.17917435, -0.39460728, 0.012819739, 0.15837549, 0.10605624, 0.21922973, 0.25763094, -0.011688642, -0.1233857, 0.49162617, -0.15246215, -0.12871982, 0.2749578, -0.24912183, -0.27655655, 0.27614963, -0.015543518, 0.30740818, 0.12660035, 0.034775224, 0.07287423, -0.6020512, 0.0732433, -0.48010373, -0.0063999984, 0.02299516, -0.0973719, -0.194734, 0.15007432, 0.2863854, -0.2561826, -0.02701807, 0.21356896, 0.08775118, -0.1012267, 0.48577482, -0.0175645, 0.22016966, -0.062453557, 0.25920734, -0.2079024, 0.25610057, -0.2586762, -0.10540434, 0.0039830296, 0.08733931, 0.050882008, -0.052359536, -0.31297603, 0.2106775, -0.0026711975, -0.04521307, -0.04683987, 0.090368256, -0.012594639, 0.06028535, 0.06301103, 0.33172685, 0.22572477, -0.019447142, -0.35718563, -0.00800849, -0.097327664, 0.042126693, 0.013663563, 0.012726291, 0.44344866, -0.08289011, 0.007116606, -0.11163828, 0.26574427, 0.18960999, 0.1414628, 0.128762, 0.078481935, 0.157456, -0.05116457, -0.026563864, -0.15368989, -0.21608125, -0.27050465, 0.21623424, -0.23023556, -0.16277516, 0.16495888, 0.21445934, -0.111836694, 0.13322864, 0.30657476, 0.13720067, -0.159983, 0.26702243, -0.12482961, 0.09016377, 0.32040662, -0.018563824, 0.16497932, 0.5124235, 0.20532905, -0.3814022, -0.03317346, -0.20309982, -0.0040927846, 0.2560035, -0.1638267, 0.1806096, 0.38001326, 0.30242908, 0.44804883, -0.0075148335, -0.12329148, 0.08455233, 0.19882831, 0.008244825, -0.17403102, -0.17037638, 0.26745683, 0.059498128, -0.14675348, -0.02704845, -0.15730269, 0.027009208, -0.14835586, -0.3984341, 0.04951467, 0.21336877, -0.49393272, 0.10631875, -0.29439995, 0.061264765, -0.21304032, 0.21745941, -0.23458827, -0.10586627, 0.39240077, -0.09479013, 0.053299956, -0.1869098, -0.15625404, 0.020766083, 0.037299443, -0.023929408, -0.020717165, 0.35108542, -0.12770072, 0.048128624, -0.0012627487, 0.21702303, -0.059279095, 0.19388442, 0.03769969, -0.14153397, -0.38913018, 0.13990252, -0.21227513, -0.433339, -0.3663871, 0.36315858, -0.14353605, -0.26056486, -0.22842707, -0.26850614, 0.06737485, 0.18009762, 0.46545935, -0.3985722, -0.077369705, 0.49015, -0.053387765, -0.16854678, 0.29057407, 0.1913427, -0.3114105, 0.33391303, 0.2558109, -0.023194117, 0.018415831, 0.51090944, 0.13048178, 0.17275383, -0.22784911, 0.46020842, -0.21511106, 0.31633744, -0.17009853, -0.20447387, -0.209933, -0.004602926, 0.32671034, 0.17847407, -0.42724732, -0.10567779, 0.038091425, 0.34635022, -0.36916775, -0.06940825, 0.0031484652, -0.35126984, 0.12976545, 0.07222441, 0.22948559, -0.38525584, -0.0054675937, 0.4029358, -0.3128454, 0.13479385, 0.30039707, 0.10271079, 0.37128493, -0.039665103, 0.0044320114, 0.05975092, -0.23670478, -0.014700713, 0.12472548, 0.56054676, 0.12695724, -0.39929366, 0.09567339, 0.24853627, -0.18731521, 0.29644012, -0.08412853, -0.047205262, 0.2728614, -0.02490702, 0.15606382, -0.097337656, -0.22999285, -0.31338483, 0.36374938, -0.19818754, -0.099194676, -0.16242108, -0.11241214, -0.13800095, 0.04759648, -0.36923477, 0.3377149, 0.119674034, -0.19736561, -0.08515725, -0.08039907, -0.14816573, -0.23759411, -0.27405044, 0.4255974, -0.16980892, -0.45103624, 0.2592508, 0.041907087, 0.36110488, 0.024747934, 0.10244622, -0.059871107, 0.15316144, 0.0838079, -0.13005643, 0.28822184, 0.0665008, -0.5368563, -0.15151228, -0.2019773, 0.08225291, 0.192799, -0.33645836, 0.038935043, 0.018619873, 0.14761768, 0.025234457, -0.09481421, -0.075215675, 0.4102618, 0.2266266, 0.2828633, 0.09196845, 0.23114052, -0.027229344, -0.32195765, 0.05286099, 0.07850093, -0.20198047, 0.44307065, -0.0828929, -0.3885311, -0.0633556, 0.39846793, 0.08895412, 0.0034600352, -0.05967322, 0.20862308, 0.15983063, -0.12449649, 0.17403392, -0.028444685, -0.14034821, -0.11273413, 0.074709274, -0.21595575, 0.06382918, -0.15581375, -0.0037954638, -0.2143301, 0.02041007, -0.20123285, 0.2582673, -0.31853202, 0.1053178, 0.06505883, 0.2866883, -0.34562057, -0.1629475, -0.074389875, 0.15515953, 0.26866287, 0.3449197, 0.031800583, 0.023580268, -0.1726088, -0.25076672, 0.081279226, -0.20510998, 0.1600836, 0.085919306, 0.24515575, -0.31630135, -0.19066125, 0.20611435, -0.09954545, -0.14281541, 0.4028987, 0.24429737, 0.21244703, 0.019491952, 0.2653371, 0.026023164, -0.17607515, -0.121807344, -0.24455278, 0.06749151, -0.074221194, -0.04900035, -0.03342888, -0.115426466, -0.18984312, -0.17625487, 0.1499366, 0.1351458, 0.017796097, -0.05602583, -0.017981825, -0.27410272, 0.31989276, 0.031326815, 0.061995383, -0.065489784, 0.04399158, -0.13519874, 0.23140492, 0.21671903, 0.09083854, -0.18631245, -0.057688475, -0.27705774, -0.35227805, 0.056066796, 0.1273246, 0.12778823, -0.07574217, -0.24935283, 0.008509332, -0.13331816, 0.17538325, 0.009673233, -0.16985297, -0.1068587, -0.06615289, -0.03307501, 0.07584848, -0.19225228, -0.20574294, -0.11182022, -0.08309786, -0.08347798, 0.35421437, -0.05453685, 0.27538902, -0.14582933, -0.0071586724, -0.1518315, 0.12785475, -0.065459505, 0.063663825, 0.24955855, -0.4544611, -0.16337994, -0.029619344, -0.1973021, -0.14364167, -0.079227276, -0.02981196, 0.2262998, -0.3568656, 0.2402911, -0.120060846, 0.19488864, -0.07728642, -0.2543439, -0.15293853, -0.009106492, 0.24851386, -0.34600765, -0.25706416, -0.2659431, -0.08471125, -0.059878696, -0.27150732, 0.42241326, -0.11469594, -0.08416896, 0.0047065616, 0.45044258, 0.20738047, 0.17053586, 0.20957811, -0.029988201, 0.040661167, 0.10623518, -0.4656837, 0.22994503, -0.23307306, -0.11735744, 0.0035948404, 0.10942622, -0.021296069, 0.022905214, -0.124894835, -0.10581059, 0.21904148, -0.3748968, -0.0631806, 0.271241, 0.1652423, -0.2599846, 0.038464893, 0.13197884, 0.38681895, 0.11253837, -0.21754825, 0.13165388, -0.32750425, -0.028989421, -0.20510201, -0.28893355, 0.15721151, -0.08457131, 0.070540816, -0.09493317, -0.28037602, 0.20949727, -0.066243395, -0.08535814, 0.42199194, 0.0021733278, -0.11638748, 0.13930771, 0.008724968, 0.032127243, -0.09387868, 0.2737223, 0.20442082, -0.2744895, 0.14772631, -0.154034, -0.040445644, -0.10262055]}] |[{doc_similarity_rankings, 0, 91, Fourth document, climate change is definitely one of the most pressing problems of our time., {pieceId -> -1, lshId -> 1274183715, isWordStart -> true, token -> Fourth document, climate change is definitely one of the most pressing problems of our time., lshNeighbors -> [(-612640902,0.12201215887654807),(-1719102856,0.28519768414650126),(-1320876223,0.3148226377437515),(-1548374770,0.3181712969830674),(1293373212,0.34962777859773414),(1634839239,0.3554576544360366),(1510101612,0.36788497133113396)], sentence -> 0}, [-0.003835852, -0.20862408, -0.21773167, -0.061703123, 0.14652315, 0.19786398, 0.26306406, -0.078452975, -0.08771903, -0.17226249, 0.22881703, 0.0019276487, -0.097814694, 0.09299635, -0.122063614, 0.4991608, 0.21783605, -0.4668241, 0.034899868, -0.032340027, -0.25079423, 0.06777912, 0.46978673, 0.32557002, 0.10782054, 0.074416175, -0.1351248, -0.043467775, 0.18473676, 0.24320771, 0.29688674, 0.063793264, 0.1008339, 0.23997188, -0.23376757, 0.06799241, -0.3010938, 0.016623985, 0.2497724, -0.21152967, -0.07613329, 0.165055, 0.21546067, -0.16450602, -0.104793414, 0.40502572, 0.22857304, 0.026289828, -0.12639655, -0.09120597, -0.36880243, 0.34113964, 0.29069296, 0.18020727, 0.014091524, 0.015363324, -0.16181347, 0.23874591, -0.08743463, -0.100837186, -0.11651775, -0.21363984, -0.0059150266, -0.053012766, 0.025422096, -0.14887391, 0.099708736, -0.15484057, -0.13415912, 0.06851785, -0.0805267, 0.1470088, 0.16662681, -0.29883987, -0.26368806, 0.047838878, -0.5956105, -0.12314607, 0.28308532, 0.4328412, -0.114371665, 0.19504122, 0.052935146, 0.22930811, -0.01836126, -0.058793154, -0.05645759, -0.10266738, 0.17061166, 0.28478023, -0.20342122, -0.391638, 0.04970928, 0.026398072, -0.08933313, 0.0040505435, -0.010148193, -0.072514966, -0.17795344, -0.204243, 0.083572954, -0.27162114, -0.14674117, 0.25768456, -0.031521432, -0.2069687, -0.019544436, 0.30430898, 0.0742721, -0.094543956, -0.17090842, 0.41898516, 0.33163518, 1.12846494E-4, 0.028253844, 0.16404548, 0.123001106, -0.2959598, 0.41834822, -0.30666676, -0.018280396, -0.10533799, 0.13000785, 0.15551804, -0.2095328, 0.28947356, 0.1302748, 0.26099348, 0.19238193, 0.095461756, -0.034195073, 0.15064953, -0.11497837, 0.14000307, 0.21017769, 0.12219769, -0.007975813, -0.34788206, -0.2179525, 0.26906368, 0.35087898, 0.16659309, -0.048696965, 0.19371018, 0.10114278, 0.19875155, 0.14317547, -0.39720836, 0.045373674, 0.3482762, 0.11755119, 0.15139255, -0.07947107, -0.27557716, -0.25579336, -0.07741892, 0.05452082, -0.3229889, -0.1349494, 0.38116154, 0.035242334, -9.4695255E-4, -0.15345678, -0.20902577, -0.028087988, -0.12072554, 0.02527599, 0.094951235, -0.08846783, -0.4103613, -0.11496751, -0.5556833, -0.12119456, 0.19222125, -0.29915833, 0.24936001, -0.2900803, 0.09389836, 0.39451364, 0.052619856, -0.003266785, -0.1926063, -0.025942078, 0.13278645, 0.32179895, 0.24352162, -0.3960295, 0.09980806, 0.17284764, 0.25954893, 0.14217232, -0.04101023, -0.13483965, 0.13977908, -0.18719734, 0.18095459, -0.21917875, 0.1663283, -0.26652965, -0.21226367, 0.27464914, -0.40665168, -0.026470223, 0.07333406, 0.26849043, 7.350019E-4, -0.056252368, -0.08676217, 0.09196398, 0.18072756, 0.14173715, -0.399947, 0.27917507, -0.026699087, -0.034646686, -0.0265695, 0.19165027, 0.25528798, 0.10507765, -0.37910807, -0.14147788, 0.1213717, 0.28017974, -0.2178526, 0.17398752, -0.29096332, -0.3853184, -0.13676582, 0.20780607, 0.21252154, 0.16414383, -0.2619306, 0.18530105, -0.099988185, -0.437557, -0.36251107, -0.11280457, 0.2483923, 0.18430209, 0.19254051, 0.25591743, 0.044537775, 0.11579968, 0.16134737, 0.18732512, -0.1632906, 0.16494152, -0.36760136, -0.063819736, -0.26302108, -0.2046593, -0.2226854, 0.39319545, -0.20637247, 0.2204216, 0.39861265, -0.3154902, -0.110166, 0.15821502, 0.08322516, 0.07195477, -0.11205203, 0.21377234, 0.1727777, -0.1082272, 0.24539624, 0.0072368, 0.255411, 0.18038207, 0.113827385, 0.13634266, 0.13769454, -0.14098604, 0.029769506, 1.07247615E-4, -0.027630894, -0.24913825, -0.14263271, 0.1956553, -0.054647956, 0.025447316, -0.16555777, -0.09804624, 0.021545487, 0.4063341, -0.37619784, 0.25973955, 0.0909772, 0.1343971, -0.2340182, -0.21328466, 0.06330135, 0.17917435, -0.39460728, 0.012819739, 0.15837549, 0.10605624, 0.21922973, 0.25763094, -0.011688642, -0.1233857, 0.49162617, -0.15246215, -0.12871982, 0.2749578, -0.24912183, -0.27655655, 0.27614963, -0.015543518, 0.30740818, 0.12660035, 0.034775224, 0.07287423, -0.6020512, 0.0732433, -0.48010373, -0.0063999984, 0.02299516, -0.0973719, -0.194734, 0.15007432, 0.2863854, -0.2561826, -0.02701807, 0.21356896, 0.08775118, -0.1012267, 0.48577482, -0.0175645, 0.22016966, -0.062453557, 0.25920734, -0.2079024, 0.25610057, -0.2586762, -0.10540434, 0.0039830296, 0.08733931, 0.050882008, -0.052359536, -0.31297603, 0.2106775, -0.0026711975, -0.04521307, -0.04683987, 0.090368256, -0.012594639, 0.06028535, 0.06301103, 0.33172685, 0.22572477, -0.019447142, -0.35718563, -0.00800849, -0.097327664, 0.042126693, 0.013663563, 0.012726291, 0.44344866, -0.08289011, 0.007116606, -0.11163828, 0.26574427, 0.18960999, 0.1414628, 0.128762, 0.078481935, 0.157456, -0.05116457, -0.026563864, -0.15368989, -0.21608125, -0.27050465, 0.21623424, -0.23023556, -0.16277516, 0.16495888, 0.21445934, -0.111836694, 0.13322864, 0.30657476, 0.13720067, -0.159983, 0.26702243, -0.12482961, 0.09016377, 0.32040662, -0.018563824, 0.16497932, 0.5124235, 0.20532905, -0.3814022, -0.03317346, -0.20309982, -0.0040927846, 0.2560035, -0.1638267, 0.1806096, 0.38001326, 0.30242908, 0.44804883, -0.0075148335, -0.12329148, 0.08455233, 0.19882831, 0.008244825, -0.17403102, -0.17037638, 0.26745683, 0.059498128, -0.14675348, -0.02704845, -0.15730269, 0.027009208, -0.14835586, -0.3984341, 0.04951467, 0.21336877, -0.49393272, 0.10631875, -0.29439995, 0.061264765, -0.21304032, 0.21745941, -0.23458827, -0.10586627, 0.39240077, -0.09479013, 0.053299956, -0.1869098, -0.15625404, 0.020766083, 0.037299443, -0.023929408, -0.020717165, 0.35108542, -0.12770072, 0.048128624, -0.0012627487, 0.21702303, -0.059279095, 0.19388442, 0.03769969, -0.14153397, -0.38913018, 0.13990252, -0.21227513, -0.433339, -0.3663871, 0.36315858, -0.14353605, -0.26056486, -0.22842707, -0.26850614, 0.06737485, 0.18009762, 0.46545935, -0.3985722, -0.077369705, 0.49015, -0.053387765, -0.16854678, 0.29057407, 0.1913427, -0.3114105, 0.33391303, 0.2558109, -0.023194117, 0.018415831, 0.51090944, 0.13048178, 0.17275383, -0.22784911, 0.46020842, -0.21511106, 0.31633744, -0.17009853, -0.20447387, -0.209933, -0.004602926, 0.32671034, 0.17847407, -0.42724732, -0.10567779, 0.038091425, 0.34635022, -0.36916775, -0.06940825, 0.0031484652, -0.35126984, 0.12976545, 0.07222441, 0.22948559, -0.38525584, -0.0054675937, 0.4029358, -0.3128454, 0.13479385, 0.30039707, 0.10271079, 0.37128493, -0.039665103, 0.0044320114, 0.05975092, -0.23670478, -0.014700713, 0.12472548, 0.56054676, 0.12695724, -0.39929366, 0.09567339, 0.24853627, -0.18731521, 0.29644012, -0.08412853, -0.047205262, 0.2728614, -0.02490702, 0.15606382, -0.097337656, -0.22999285, -0.31338483, 0.36374938, -0.19818754, -0.099194676, -0.16242108, -0.11241214, -0.13800095, 0.04759648, -0.36923477, 0.3377149, 0.119674034, -0.19736561, -0.08515725, -0.08039907, -0.14816573, -0.23759411, -0.27405044, 0.4255974, -0.16980892, -0.45103624, 0.2592508, 0.041907087, 0.36110488, 0.024747934, 0.10244622, -0.059871107, 0.15316144, 0.0838079, -0.13005643, 0.28822184, 0.0665008, -0.5368563, -0.15151228, -0.2019773, 0.08225291, 0.192799, -0.33645836, 0.038935043, 0.018619873, 0.14761768, 0.025234457, -0.09481421, -0.075215675, 0.4102618, 0.2266266, 0.2828633, 0.09196845, 0.23114052, -0.027229344, -0.32195765, 0.05286099, 0.07850093, -0.20198047, 0.44307065, -0.0828929, -0.3885311, -0.0633556, 0.39846793, 0.08895412, 0.0034600352, -0.05967322, 0.20862308, 0.15983063, -0.12449649, 0.17403392, -0.028444685, -0.14034821, -0.11273413, 0.074709274, -0.21595575, 0.06382918, -0.15581375, -0.0037954638, -0.2143301, 0.02041007, -0.20123285, 0.2582673, -0.31853202, 0.1053178, 0.06505883, 0.2866883, -0.34562057, -0.1629475, -0.074389875, 0.15515953, 0.26866287, 0.3449197, 0.031800583, 0.023580268, -0.1726088, -0.25076672, 0.081279226, -0.20510998, 0.1600836, 0.085919306, 0.24515575, -0.31630135, -0.19066125, 0.20611435, -0.09954545, -0.14281541, 0.4028987, 0.24429737, 0.21244703, 0.019491952, 0.2653371, 0.026023164, -0.17607515, -0.121807344, -0.24455278, 0.06749151, -0.074221194, -0.04900035, -0.03342888, -0.115426466, -0.18984312, -0.17625487, 0.1499366, 0.1351458, 0.017796097, -0.05602583, -0.017981825, -0.27410272, 0.31989276, 0.031326815, 0.061995383, -0.065489784, 0.04399158, -0.13519874, 0.23140492, 0.21671903, 0.09083854, -0.18631245, -0.057688475, -0.27705774, -0.35227805, 0.056066796, 0.1273246, 0.12778823, -0.07574217, -0.24935283, 0.008509332, -0.13331816, 0.17538325, 0.009673233, -0.16985297, -0.1068587, -0.06615289, -0.03307501, 0.07584848, -0.19225228, -0.20574294, -0.11182022, -0.08309786, -0.08347798, 0.35421437, -0.05453685, 0.27538902, -0.14582933, -0.0071586724, -0.1518315, 0.12785475, -0.065459505, 0.063663825, 0.24955855, -0.4544611, -0.16337994, -0.029619344, -0.1973021, -0.14364167, -0.079227276, -0.02981196, 0.2262998, -0.3568656, 0.2402911, -0.120060846, 0.19488864, -0.07728642, -0.2543439, -0.15293853, -0.009106492, 0.24851386, -0.34600765, -0.25706416, -0.2659431, -0.08471125, -0.059878696, -0.27150732, 0.42241326, -0.11469594, -0.08416896, 0.0047065616, 0.45044258, 0.20738047, 0.17053586, 0.20957811, -0.029988201, 0.040661167, 0.10623518, -0.4656837, 0.22994503, -0.23307306, -0.11735744, 0.0035948404, 0.10942622, -0.021296069, 0.022905214, -0.124894835, -0.10581059, 0.21904148, -0.3748968, -0.0631806, 0.271241, 0.1652423, -0.2599846, 0.038464893, 0.13197884, 0.38681895, 0.11253837, -0.21754825, 0.13165388, -0.32750425, -0.028989421, -0.20510201, -0.28893355, 0.15721151, -0.08457131, 0.070540816, -0.09493317, -0.28037602, 0.20949727, -0.066243395, -0.08535814, 0.42199194, 0.0021733278, -0.11638748, 0.13930771, 0.008724968, 0.032127243, -0.09387868, 0.2737223, 0.20442082, -0.2744895, 0.14772631, -0.154034, -0.040445644, -0.10262055]}] |1274183715 |[(-612640902,0.12201215887654807),(-1719102856,0.28519768414650126),(-1320876223,0.3148226377437515),(-1548374770,0.3181712969830674),(1293373212,0.34962777859773414),(1634839239,0.3554576544360366),(1510101612,0.36788497133113396)]|-612640902 |0.12201215887654807 |\n", - "|Fifth document, Florence in Italy, is among the most beautiful cities in Europe. |[{document, 0, 79, Fifth document, Florence in Italy, is among the most beautiful cities in Europe., {sentence -> 0}, []}] |[{document, 0, 79, Fifth document, Florence in Italy, is among the most beautiful cities in Europe., {sentence -> 0}, []}] |[{token, 0, 4, Fifth, {sentence -> 0}, []}, {token, 6, 13, document, {sentence -> 0}, []}, {token, 14, 14, ,, {sentence -> 0}, []}, {token, 16, 23, Florence, {sentence -> 0}, []}, {token, 25, 26, in, {sentence -> 0}, []}, {token, 28, 32, Italy, {sentence -> 0}, []}, {token, 33, 33, ,, {sentence -> 0}, []}, {token, 35, 36, is, {sentence -> 0}, []}, {token, 38, 42, among, {sentence -> 0}, []}, {token, 44, 46, the, {sentence -> 0}, []}, {token, 48, 51, most, {sentence -> 0}, []}, {token, 53, 61, beautiful, {sentence -> 0}, []}, {token, 63, 68, cities, {sentence -> 0}, []}, {token, 70, 71, in, {sentence -> 0}, []}, {token, 73, 78, Europe, {sentence -> 0}, []}, {token, 79, 79, ., {sentence -> 0}, []}] |[{sentence_embeddings, 0, 79, Fifth document, Florence in Italy, is among the most beautiful cities in Europe., {sentence -> 0, token -> Fifth document, Florence in Italy, is among the most beautiful cities in Europe., pieceId -> -1, isWordStart -> true}, [-0.017528567, -0.20887347, -0.21519172, -0.03888638, 0.13357492, 0.18307811, 0.25547925, -0.08337439, -0.093116276, -0.16162084, 0.23401257, -0.013713108, -0.12097579, 0.09835637, -0.13103157, 0.49466628, 0.2324918, -0.45448515, 0.019689137, -0.04135379, -0.2731323, 0.04852046, 0.45949715, 0.31224954, 0.09636614, 0.07835619, -0.14175831, -0.027474394, 0.19975916, 0.24123047, 0.29196763, 0.067277946, 0.11105793, 0.25373426, -0.24394739, 0.06752376, -0.3060502, 9.322982E-4, 0.23657267, -0.20978928, -0.07832431, 0.17510965, 0.22120374, -0.17409658, -0.10543208, 0.41644603, 0.2284994, 0.019994497, -0.1206437, -0.10357072, -0.36405995, 0.34403795, 0.2979404, 0.18125014, 0.018748851, 0.04012803, -0.1733186, 0.24439529, -0.08001437, -0.11709171, -0.12347519, -0.21569543, -0.026534403, -0.050762545, 0.008339827, -0.14291511, 0.09049857, -0.16389105, -0.14423226, 0.061702, -0.095125, 0.12417953, 0.16685584, -0.3054142, -0.28169885, 0.050286457, -0.6093201, -0.122661844, 0.2895588, 0.4279639, -0.13902375, 0.21143883, 0.051323365, 0.23632784, -0.01041111, -0.04957247, -0.055871606, -0.106259845, 0.18611698, 0.28124726, -0.2007103, -0.38195023, 0.06384789, 0.01629718, -0.0945481, -0.010838091, -0.04080201, -0.054464836, -0.17945004, -0.21212989, 0.07180114, -0.27967128, -0.1390803, 0.2592707, -0.03620937, -0.19366772, -0.026854295, 0.29872236, 0.07377065, -0.09859825, -0.16164072, 0.43511766, 0.33360565, 0.00644084, 0.021297934, 0.17641163, 0.109603375, -0.2808542, 0.4100023, -0.30830663, -0.02324736, -0.12787724, 0.13755724, 0.14575775, -0.21397276, 0.27359906, 0.1360874, 0.2805497, 0.18917772, 0.082572766, -0.03715889, 0.14764093, -0.116034165, 0.14106748, 0.21679801, 0.12502475, -0.017175207, -0.351808, -0.21835403, 0.26628754, 0.364595, 0.17638803, -0.041836392, 0.18179081, 0.108427785, 0.22434832, 0.15892959, -0.39827198, 0.044703104, 0.3359178, 0.13377586, 0.15026097, -0.08827668, -0.26545244, -0.24913852, -0.08579704, 0.05721378, -0.31592715, -0.14231163, 0.38532966, 0.036052644, -0.0027677694, -0.15842925, -0.21617033, -0.03264196, -0.13026895, 0.05327642, 0.08736663, -0.09196017, -0.41138726, -0.090421245, -0.5568038, -0.117088675, 0.1952759, -0.2986329, 0.24587378, -0.28948125, 0.083854966, 0.3666834, 0.025861917, -0.0017132398, -0.18599303, -0.03876378, 0.12794247, 0.3335259, 0.22762723, -0.39386296, 0.08550472, 0.14752512, 0.2544776, 0.13983288, -0.030865967, -0.13050406, 0.124644294, -0.19920404, 0.18511117, -0.20913021, 0.18329288, -0.26674783, -0.20118403, 0.2890029, -0.40576503, -0.040320467, 0.09285337, 0.2520434, 0.0034789476, -0.042712186, -0.07351959, 0.08958212, 0.18637295, 0.15107888, -0.3961306, 0.2681504, -0.021723367, -0.024320226, -0.031113315, 0.19820504, 0.26775804, 0.13101543, -0.39684162, -0.1377801, 0.104406044, 0.28336167, -0.22840387, 0.16368443, -0.29106283, -0.38266584, -0.13229474, 0.20744194, 0.22572315, 0.16587397, -0.27363026, 0.1919627, -0.088201866, -0.42006725, -0.37401968, -0.11527952, 0.23918867, 0.19769923, 0.17820881, 0.2648834, 0.011358881, 0.12344342, 0.17440955, 0.1688173, -0.15462974, 0.18139134, -0.37719452, -0.07280123, -0.26092157, -0.19534999, -0.23901841, 0.3925651, -0.2268674, 0.2323894, 0.38537943, -0.32429454, -0.094378285, 0.16097061, 0.084330045, 0.07674305, -0.100616775, 0.20368801, 0.18317987, -0.10215017, 0.25102583, 0.013015796, 0.2618376, 0.19005089, 0.10934586, 0.121803135, 0.14295664, -0.13193665, 0.0531183, 0.017180707, -0.027863244, -0.26400146, -0.1470274, 0.21853186, -0.02715699, 0.027724234, -0.1564268, -0.102942735, 0.016841251, 0.40123186, -0.37726215, 0.26930076, 0.08578669, 0.1325882, -0.24705328, -0.21208054, 0.068617605, 0.18242663, -0.39056832, -0.01681968, 0.1632192, 0.10309466, 0.21451901, 0.25458825, 0.0111891525, -0.11184347, 0.4824411, -0.15262514, -0.12453984, 0.26980564, -0.24968751, -0.2749819, 0.27711532, -0.024115924, 0.30004704, 0.11883662, 0.026279865, 0.08240666, -0.6025861, 0.07997487, -0.46908012, 0.00318713, 0.017263228, -0.08420625, -0.20542656, 0.15257843, 0.28043687, -0.2576107, -0.029525554, 0.22024228, 0.08719829, -0.10169949, 0.47289637, -0.0070210746, 0.21146531, -0.09518342, 0.25460428, -0.22287658, 0.27640212, -0.2639986, -0.09115377, 0.012192329, 0.11418934, 0.042151, -0.062401574, -0.3224692, 0.21616587, -0.009924554, -0.03401868, -0.06122483, 0.09629407, 0.0010092515, 0.04388869, 0.06199242, 0.31550166, 0.21101683, -0.029377753, -0.36951146, -0.0112636825, -0.102699466, 0.035519373, 0.015170618, -0.014788982, 0.4407807, -0.09595226, 0.018110882, -0.13322428, 0.25438005, 0.20591274, 0.13240317, 0.1370632, 0.07530696, 0.15535797, -0.054800827, -0.00992456, -0.15799199, -0.22623523, -0.27510443, 0.21811266, -0.22423144, -0.1767162, 0.18349846, 0.22506163, -0.13641146, 0.12888514, 0.32516634, 0.116478354, -0.14479439, 0.2633992, -0.101380765, 0.08838486, 0.31893408, -0.022981562, 0.15893175, 0.51558924, 0.21190989, -0.36944315, -0.048417248, -0.22201133, -0.013516225, 0.26638502, -0.15765886, 0.1851696, 0.38429335, 0.30839187, 0.44408417, 0.016373433, -0.111148976, 0.09977997, 0.21016383, 0.018669449, -0.16498841, -0.15592425, 0.26489583, 0.06269013, -0.15986864, -0.03969585, -0.13937603, 0.031850375, -0.13655634, -0.40000895, 0.05436843, 0.21910602, -0.4996334, 0.086884744, -0.29076114, 0.05204509, -0.22826457, 0.20508406, -0.2507804, -0.10823102, 0.38743898, -0.100984976, 0.058472827, -0.16382112, -0.16156591, 0.023903372, 0.033624973, -0.023788728, -0.018010495, 0.3690815, -0.09954051, 0.052623548, 0.004742802, 0.19814914, -0.06711465, 0.17929058, 0.03160398, -0.1334006, -0.36719155, 0.13863695, -0.22021416, -0.4222436, -0.35124636, 0.36573943, -0.14639676, -0.26974335, -0.21589987, -0.2655588, 0.06684742, 0.17689691, 0.44980702, -0.4014982, -0.09032181, 0.48900092, -0.07084343, -0.1657361, 0.29341906, 0.21496047, -0.3124336, 0.3256341, 0.26760855, -0.04306482, 0.025937881, 0.5136517, 0.13405447, 0.18719737, -0.22120632, 0.4532139, -0.22904672, 0.32023016, -0.17113322, -0.214062, -0.19590136, -0.004523295, 0.3206988, 0.18094444, -0.4241107, -0.09262745, 0.01998503, 0.3636845, -0.36537707, -0.064887315, -0.0055596177, -0.34393403, 0.14510506, 0.06153031, 0.21042548, -0.37673673, 0.00559097, 0.3951263, -0.3184028, 0.13477531, 0.32866392, 0.11474525, 0.36869153, -0.041948557, 0.0048171515, 0.060693625, -0.23275205, 0.0050272522, 0.10768602, 0.5644927, 0.16615626, -0.408257, 0.09772438, 0.2569018, -0.17795925, 0.30351314, -0.08614802, -0.053504214, 0.26501653, -0.03555109, 0.15758383, -0.09940738, -0.24023113, -0.31173047, 0.38147378, -0.21063639, -0.09774987, -0.1817993, -0.11774616, -0.14214633, 0.059035324, -0.3725449, 0.3340544, 0.13414147, -0.19714779, -0.09575635, -0.09342445, -0.14242783, -0.22104931, -0.26635736, 0.4439432, -0.15269086, -0.45180112, 0.26066065, 0.040218845, 0.35135338, 0.042087253, 0.11012991, -0.070131294, 0.16262923, 0.093960635, -0.13531551, 0.27414426, 0.06098134, -0.5368306, -0.14256248, -0.237552, 0.084959276, 0.18812361, -0.33930442, 0.038618933, 0.020493202, 0.12378717, 0.025085747, -0.091824085, -0.07121754, 0.39458132, 0.23002088, 0.2822764, 0.09145013, 0.21399665, -0.044916313, -0.33112302, 0.037864167, 0.07553057, -0.20884651, 0.44166437, -0.08572297, -0.39234006, -0.072089285, 0.40756452, 0.09254245, 0.009302463, -0.047768205, 0.22046876, 0.16218755, -0.13332985, 0.18362974, -0.019280719, -0.12338676, -0.114508614, 0.05637966, -0.22382355, 0.056215905, -0.15588409, -0.0018170164, -0.21711601, 0.01625738, -0.19152458, 0.2638185, -0.3450164, 0.08638652, 0.06305374, 0.27371433, -0.35860744, -0.17530364, -0.08452919, 0.16955557, 0.28973067, 0.34323737, 0.01695178, 0.011290298, -0.1810764, -0.22997546, 0.059361335, -0.22219576, 0.1413909, 0.06546778, 0.24282089, -0.32794768, -0.17942922, 0.18658105, -0.097890414, -0.1513686, 0.39975017, 0.25039408, 0.21449175, 0.024416223, 0.27124867, 4.2602097E-4, -0.18044855, -0.13447432, -0.2740434, 0.07203264, -0.07980538, -0.07517155, -0.05881436, -0.12858282, -0.20378308, -0.16090913, 0.15564685, 0.13000391, 0.024031563, -0.057775024, -0.0034629763, -0.2688574, 0.32077903, 0.028848177, 0.047397353, -0.05958363, 0.04970749, -0.13669938, 0.22241849, 0.19625284, 0.08440484, -0.18950449, -0.07067425, -0.2850016, -0.36458367, 0.057933036, 0.13580492, 0.108672015, -0.07561426, -0.2602871, 0.010676954, -0.14393383, 0.17847988, 0.008865389, -0.1725383, -0.09610779, -0.07299303, -0.034239206, 0.074058056, -0.19913039, -0.20220406, -0.09904165, -0.076578766, -0.069398046, 0.33933428, -0.045407534, 0.2897711, -0.14395963, 0.006539133, -0.14036646, 0.1162395, -0.06965859, 0.039443154, 0.276661, -0.45402157, -0.16553378, -0.02244941, -0.19449434, -0.12615664, -0.07213336, -0.032101206, 0.2264348, -0.34752584, 0.24813168, -0.12469421, 0.19268365, -0.07229131, -0.26878825, -0.1646568, -0.0136323245, 0.24497744, -0.33925042, -0.24806993, -0.27512118, -0.09914559, -0.08087616, -0.25906667, 0.43983266, -0.10659018, -0.0744615, 0.006235671, 0.45885658, 0.20347849, 0.16656812, 0.20918673, -0.033345483, 0.044874102, 0.12723547, -0.47557342, 0.22238532, -0.22489853, -0.123347975, 0.0065780478, 0.110617466, -0.008447691, 0.044564214, -0.13392605, -0.09336997, 0.2162811, -0.3759698, -0.06168682, 0.26348773, 0.14962575, -0.2495721, 0.023211598, 0.13297167, 0.38353235, 0.08947278, -0.2225364, 0.105881885, -0.33606172, -0.042888578, -0.2036912, -0.3129537, 0.1476135, -0.07123925, 0.079535104, -0.09969524, -0.29478237, 0.21786276, -0.059731945, -0.092672855, 0.4276989, -3.9967615E-4, -0.12086898, 0.1315131, 0.01156185, 0.019471781, -0.10278702, 0.2727465, 0.20356695, -0.28574204, 0.16212702, -0.14182404, -0.051593896, -0.09539393]}] |[{doc_similarity_rankings, 0, 79, Fifth document, Florence in Italy, is among the most beautiful cities in Europe., {pieceId -> -1, lshId -> -1320876223, isWordStart -> true, token -> Fifth document, Florence in Italy, is among the most beautiful cities in Europe., lshNeighbors -> [(1293373212,0.17848861258809434),(-1719102856,0.2761524746260818),(1274183715,0.3148226377437515),(-612640902,0.32308714836804664),(-1548374770,0.32312628638943774),(1634839239,0.3992484826857293),(1510101612,0.3994126224276987)], sentence -> 0}, [-0.017528567, -0.20887347, -0.21519172, -0.03888638, 0.13357492, 0.18307811, 0.25547925, -0.08337439, -0.093116276, -0.16162084, 0.23401257, -0.013713108, -0.12097579, 0.09835637, -0.13103157, 0.49466628, 0.2324918, -0.45448515, 0.019689137, -0.04135379, -0.2731323, 0.04852046, 0.45949715, 0.31224954, 0.09636614, 0.07835619, -0.14175831, -0.027474394, 0.19975916, 0.24123047, 0.29196763, 0.067277946, 0.11105793, 0.25373426, -0.24394739, 0.06752376, -0.3060502, 9.322982E-4, 0.23657267, -0.20978928, -0.07832431, 0.17510965, 0.22120374, -0.17409658, -0.10543208, 0.41644603, 0.2284994, 0.019994497, -0.1206437, -0.10357072, -0.36405995, 0.34403795, 0.2979404, 0.18125014, 0.018748851, 0.04012803, -0.1733186, 0.24439529, -0.08001437, -0.11709171, -0.12347519, -0.21569543, -0.026534403, -0.050762545, 0.008339827, -0.14291511, 0.09049857, -0.16389105, -0.14423226, 0.061702, -0.095125, 0.12417953, 0.16685584, -0.3054142, -0.28169885, 0.050286457, -0.6093201, -0.122661844, 0.2895588, 0.4279639, -0.13902375, 0.21143883, 0.051323365, 0.23632784, -0.01041111, -0.04957247, -0.055871606, -0.106259845, 0.18611698, 0.28124726, -0.2007103, -0.38195023, 0.06384789, 0.01629718, -0.0945481, -0.010838091, -0.04080201, -0.054464836, -0.17945004, -0.21212989, 0.07180114, -0.27967128, -0.1390803, 0.2592707, -0.03620937, -0.19366772, -0.026854295, 0.29872236, 0.07377065, -0.09859825, -0.16164072, 0.43511766, 0.33360565, 0.00644084, 0.021297934, 0.17641163, 0.109603375, -0.2808542, 0.4100023, -0.30830663, -0.02324736, -0.12787724, 0.13755724, 0.14575775, -0.21397276, 0.27359906, 0.1360874, 0.2805497, 0.18917772, 0.082572766, -0.03715889, 0.14764093, -0.116034165, 0.14106748, 0.21679801, 0.12502475, -0.017175207, -0.351808, -0.21835403, 0.26628754, 0.364595, 0.17638803, -0.041836392, 0.18179081, 0.108427785, 0.22434832, 0.15892959, -0.39827198, 0.044703104, 0.3359178, 0.13377586, 0.15026097, -0.08827668, -0.26545244, -0.24913852, -0.08579704, 0.05721378, -0.31592715, -0.14231163, 0.38532966, 0.036052644, -0.0027677694, -0.15842925, -0.21617033, -0.03264196, -0.13026895, 0.05327642, 0.08736663, -0.09196017, -0.41138726, -0.090421245, -0.5568038, -0.117088675, 0.1952759, -0.2986329, 0.24587378, -0.28948125, 0.083854966, 0.3666834, 0.025861917, -0.0017132398, -0.18599303, -0.03876378, 0.12794247, 0.3335259, 0.22762723, -0.39386296, 0.08550472, 0.14752512, 0.2544776, 0.13983288, -0.030865967, -0.13050406, 0.124644294, -0.19920404, 0.18511117, -0.20913021, 0.18329288, -0.26674783, -0.20118403, 0.2890029, -0.40576503, -0.040320467, 0.09285337, 0.2520434, 0.0034789476, -0.042712186, -0.07351959, 0.08958212, 0.18637295, 0.15107888, -0.3961306, 0.2681504, -0.021723367, -0.024320226, -0.031113315, 0.19820504, 0.26775804, 0.13101543, -0.39684162, -0.1377801, 0.104406044, 0.28336167, -0.22840387, 0.16368443, -0.29106283, -0.38266584, -0.13229474, 0.20744194, 0.22572315, 0.16587397, -0.27363026, 0.1919627, -0.088201866, -0.42006725, -0.37401968, -0.11527952, 0.23918867, 0.19769923, 0.17820881, 0.2648834, 0.011358881, 0.12344342, 0.17440955, 0.1688173, -0.15462974, 0.18139134, -0.37719452, -0.07280123, -0.26092157, -0.19534999, -0.23901841, 0.3925651, -0.2268674, 0.2323894, 0.38537943, -0.32429454, -0.094378285, 0.16097061, 0.084330045, 0.07674305, -0.100616775, 0.20368801, 0.18317987, -0.10215017, 0.25102583, 0.013015796, 0.2618376, 0.19005089, 0.10934586, 0.121803135, 0.14295664, -0.13193665, 0.0531183, 0.017180707, -0.027863244, -0.26400146, -0.1470274, 0.21853186, -0.02715699, 0.027724234, -0.1564268, -0.102942735, 0.016841251, 0.40123186, -0.37726215, 0.26930076, 0.08578669, 0.1325882, -0.24705328, -0.21208054, 0.068617605, 0.18242663, -0.39056832, -0.01681968, 0.1632192, 0.10309466, 0.21451901, 0.25458825, 0.0111891525, -0.11184347, 0.4824411, -0.15262514, -0.12453984, 0.26980564, -0.24968751, -0.2749819, 0.27711532, -0.024115924, 0.30004704, 0.11883662, 0.026279865, 0.08240666, -0.6025861, 0.07997487, -0.46908012, 0.00318713, 0.017263228, -0.08420625, -0.20542656, 0.15257843, 0.28043687, -0.2576107, -0.029525554, 0.22024228, 0.08719829, -0.10169949, 0.47289637, -0.0070210746, 0.21146531, -0.09518342, 0.25460428, -0.22287658, 0.27640212, -0.2639986, -0.09115377, 0.012192329, 0.11418934, 0.042151, -0.062401574, -0.3224692, 0.21616587, -0.009924554, -0.03401868, -0.06122483, 0.09629407, 0.0010092515, 0.04388869, 0.06199242, 0.31550166, 0.21101683, -0.029377753, -0.36951146, -0.0112636825, -0.102699466, 0.035519373, 0.015170618, -0.014788982, 0.4407807, -0.09595226, 0.018110882, -0.13322428, 0.25438005, 0.20591274, 0.13240317, 0.1370632, 0.07530696, 0.15535797, -0.054800827, -0.00992456, -0.15799199, -0.22623523, -0.27510443, 0.21811266, -0.22423144, -0.1767162, 0.18349846, 0.22506163, -0.13641146, 0.12888514, 0.32516634, 0.116478354, -0.14479439, 0.2633992, -0.101380765, 0.08838486, 0.31893408, -0.022981562, 0.15893175, 0.51558924, 0.21190989, -0.36944315, -0.048417248, -0.22201133, -0.013516225, 0.26638502, -0.15765886, 0.1851696, 0.38429335, 0.30839187, 0.44408417, 0.016373433, -0.111148976, 0.09977997, 0.21016383, 0.018669449, -0.16498841, -0.15592425, 0.26489583, 0.06269013, -0.15986864, -0.03969585, -0.13937603, 0.031850375, -0.13655634, -0.40000895, 0.05436843, 0.21910602, -0.4996334, 0.086884744, -0.29076114, 0.05204509, -0.22826457, 0.20508406, -0.2507804, -0.10823102, 0.38743898, -0.100984976, 0.058472827, -0.16382112, -0.16156591, 0.023903372, 0.033624973, -0.023788728, -0.018010495, 0.3690815, -0.09954051, 0.052623548, 0.004742802, 0.19814914, -0.06711465, 0.17929058, 0.03160398, -0.1334006, -0.36719155, 0.13863695, -0.22021416, -0.4222436, -0.35124636, 0.36573943, -0.14639676, -0.26974335, -0.21589987, -0.2655588, 0.06684742, 0.17689691, 0.44980702, -0.4014982, -0.09032181, 0.48900092, -0.07084343, -0.1657361, 0.29341906, 0.21496047, -0.3124336, 0.3256341, 0.26760855, -0.04306482, 0.025937881, 0.5136517, 0.13405447, 0.18719737, -0.22120632, 0.4532139, -0.22904672, 0.32023016, -0.17113322, -0.214062, -0.19590136, -0.004523295, 0.3206988, 0.18094444, -0.4241107, -0.09262745, 0.01998503, 0.3636845, -0.36537707, -0.064887315, -0.0055596177, -0.34393403, 0.14510506, 0.06153031, 0.21042548, -0.37673673, 0.00559097, 0.3951263, -0.3184028, 0.13477531, 0.32866392, 0.11474525, 0.36869153, -0.041948557, 0.0048171515, 0.060693625, -0.23275205, 0.0050272522, 0.10768602, 0.5644927, 0.16615626, -0.408257, 0.09772438, 0.2569018, -0.17795925, 0.30351314, -0.08614802, -0.053504214, 0.26501653, -0.03555109, 0.15758383, -0.09940738, -0.24023113, -0.31173047, 0.38147378, -0.21063639, -0.09774987, -0.1817993, -0.11774616, -0.14214633, 0.059035324, -0.3725449, 0.3340544, 0.13414147, -0.19714779, -0.09575635, -0.09342445, -0.14242783, -0.22104931, -0.26635736, 0.4439432, -0.15269086, -0.45180112, 0.26066065, 0.040218845, 0.35135338, 0.042087253, 0.11012991, -0.070131294, 0.16262923, 0.093960635, -0.13531551, 0.27414426, 0.06098134, -0.5368306, -0.14256248, -0.237552, 0.084959276, 0.18812361, -0.33930442, 0.038618933, 0.020493202, 0.12378717, 0.025085747, -0.091824085, -0.07121754, 0.39458132, 0.23002088, 0.2822764, 0.09145013, 0.21399665, -0.044916313, -0.33112302, 0.037864167, 0.07553057, -0.20884651, 0.44166437, -0.08572297, -0.39234006, -0.072089285, 0.40756452, 0.09254245, 0.009302463, -0.047768205, 0.22046876, 0.16218755, -0.13332985, 0.18362974, -0.019280719, -0.12338676, -0.114508614, 0.05637966, -0.22382355, 0.056215905, -0.15588409, -0.0018170164, -0.21711601, 0.01625738, -0.19152458, 0.2638185, -0.3450164, 0.08638652, 0.06305374, 0.27371433, -0.35860744, -0.17530364, -0.08452919, 0.16955557, 0.28973067, 0.34323737, 0.01695178, 0.011290298, -0.1810764, -0.22997546, 0.059361335, -0.22219576, 0.1413909, 0.06546778, 0.24282089, -0.32794768, -0.17942922, 0.18658105, -0.097890414, -0.1513686, 0.39975017, 0.25039408, 0.21449175, 0.024416223, 0.27124867, 4.2602097E-4, -0.18044855, -0.13447432, -0.2740434, 0.07203264, -0.07980538, -0.07517155, -0.05881436, -0.12858282, -0.20378308, -0.16090913, 0.15564685, 0.13000391, 0.024031563, -0.057775024, -0.0034629763, -0.2688574, 0.32077903, 0.028848177, 0.047397353, -0.05958363, 0.04970749, -0.13669938, 0.22241849, 0.19625284, 0.08440484, -0.18950449, -0.07067425, -0.2850016, -0.36458367, 0.057933036, 0.13580492, 0.108672015, -0.07561426, -0.2602871, 0.010676954, -0.14393383, 0.17847988, 0.008865389, -0.1725383, -0.09610779, -0.07299303, -0.034239206, 0.074058056, -0.19913039, -0.20220406, -0.09904165, -0.076578766, -0.069398046, 0.33933428, -0.045407534, 0.2897711, -0.14395963, 0.006539133, -0.14036646, 0.1162395, -0.06965859, 0.039443154, 0.276661, -0.45402157, -0.16553378, -0.02244941, -0.19449434, -0.12615664, -0.07213336, -0.032101206, 0.2264348, -0.34752584, 0.24813168, -0.12469421, 0.19268365, -0.07229131, -0.26878825, -0.1646568, -0.0136323245, 0.24497744, -0.33925042, -0.24806993, -0.27512118, -0.09914559, -0.08087616, -0.25906667, 0.43983266, -0.10659018, -0.0744615, 0.006235671, 0.45885658, 0.20347849, 0.16656812, 0.20918673, -0.033345483, 0.044874102, 0.12723547, -0.47557342, 0.22238532, -0.22489853, -0.123347975, 0.0065780478, 0.110617466, -0.008447691, 0.044564214, -0.13392605, -0.09336997, 0.2162811, -0.3759698, -0.06168682, 0.26348773, 0.14962575, -0.2495721, 0.023211598, 0.13297167, 0.38353235, 0.08947278, -0.2225364, 0.105881885, -0.33606172, -0.042888578, -0.2036912, -0.3129537, 0.1476135, -0.07123925, 0.079535104, -0.09969524, -0.29478237, 0.21786276, -0.059731945, -0.092672855, 0.4276989, -3.9967615E-4, -0.12086898, 0.1315131, 0.01156185, 0.019471781, -0.10278702, 0.2727465, 0.20356695, -0.28574204, 0.16212702, -0.14182404, -0.051593896, -0.09539393]}] |-1320876223 |[(1293373212,0.17848861258809434),(-1719102856,0.2761524746260818),(1274183715,0.3148226377437515),(-612640902,0.32308714836804664),(-1548374770,0.32312628638943774),(1634839239,0.3992484826857293),(1510101612,0.3994126224276987)] |1293373212 |0.17848861258809434 |\n", - "|Sixth document, Florence in Italy, is a very beautiful city in Europe like Lyon in France. |[{document, 0, 89, Sixth document, Florence in Italy, is a very beautiful city in Europe like Lyon in France., {sentence -> 0}, []}] |[{document, 0, 89, Sixth document, Florence in Italy, is a very beautiful city in Europe like Lyon in France., {sentence -> 0}, []}] |[{token, 0, 4, Sixth, {sentence -> 0}, []}, {token, 6, 13, document, {sentence -> 0}, []}, {token, 14, 14, ,, {sentence -> 0}, []}, {token, 16, 23, Florence, {sentence -> 0}, []}, {token, 25, 26, in, {sentence -> 0}, []}, {token, 28, 32, Italy, {sentence -> 0}, []}, {token, 33, 33, ,, {sentence -> 0}, []}, {token, 35, 36, is, {sentence -> 0}, []}, {token, 38, 38, a, {sentence -> 0}, []}, {token, 40, 43, very, {sentence -> 0}, []}, {token, 45, 53, beautiful, {sentence -> 0}, []}, {token, 55, 58, city, {sentence -> 0}, []}, {token, 60, 61, in, {sentence -> 0}, []}, {token, 63, 68, Europe, {sentence -> 0}, []}, {token, 70, 73, like, {sentence -> 0}, []}, {token, 75, 78, Lyon, {sentence -> 0}, []}, {token, 80, 81, in, {sentence -> 0}, []}, {token, 83, 88, France, {sentence -> 0}, []}, {token, 89, 89, ., {sentence -> 0}, []}]|[{sentence_embeddings, 0, 89, Sixth document, Florence in Italy, is a very beautiful city in Europe like Lyon in France., {sentence -> 0, token -> Sixth document, Florence in Italy, is a very beautiful city in Europe like Lyon in France., pieceId -> -1, isWordStart -> true}, [-0.023446266, -0.20165044, -0.21878108, -0.041623246, 0.11712541, 0.17356895, 0.25060305, -0.07937133, -0.10039697, -0.15663107, 0.23232257, -0.01931893, -0.12145367, 0.10105985, -0.13435838, 0.49187884, 0.23358476, -0.45301625, 0.026803127, -0.04935983, -0.26145518, 0.05165556, 0.45950112, 0.306874, 0.0998373, 0.078277044, -0.14320722, -0.01781481, 0.19437787, 0.25057974, 0.2904214, 0.05932349, 0.102811575, 0.2567444, -0.24400775, 0.076585956, -0.3172145, 0.004280416, 0.23430033, -0.19544546, -0.08320522, 0.1736987, 0.20459501, -0.16766919, -0.11249152, 0.4083342, 0.23185952, 0.022361012, -0.12112098, -0.10556416, -0.36327595, 0.3500205, 0.3062079, 0.19041865, 0.0067362506, 0.039656542, -0.17330945, 0.25340703, -0.08069101, -0.119429514, -0.13032362, -0.20993283, -0.0167645, -0.050086897, 0.0056060716, -0.15245143, 0.08916381, -0.1656918, -0.14617592, 0.07238135, -0.09877798, 0.1185568, 0.16529602, -0.2908418, -0.2838721, 0.049403507, -0.6004667, -0.12694398, 0.300743, 0.42590234, -0.1330224, 0.21685761, 0.042737015, 0.23039605, -0.005907735, -0.046434082, -0.051799156, -0.10921319, 0.19471209, 0.280458, -0.21468745, -0.389082, 0.06603544, 0.0058575007, -0.092998825, -0.009324989, -0.038295563, -0.05944777, -0.17968194, -0.2114523, 0.07265465, -0.2734195, -0.1358477, 0.25927955, -0.031233825, -0.19235252, -0.029086787, 0.2901101, 0.06723385, -0.102146484, -0.16508116, 0.43281472, 0.32636094, 0.009097091, 0.01839175, 0.18650065, 0.10878458, -0.28116676, 0.40945655, -0.30725458, -0.022352053, -0.11574949, 0.14054005, 0.14310767, -0.21440077, 0.26043293, 0.13321282, 0.27419066, 0.17493708, 0.085719526, -0.04549965, 0.14518753, -0.11650214, 0.1432643, 0.2130986, 0.12127107, -0.013671406, -0.34709513, -0.22083285, 0.25214735, 0.35938737, 0.17598711, -0.04147198, 0.18509206, 0.10416745, 0.22944078, 0.16071385, -0.3968478, 0.04381103, 0.34408224, 0.1280205, 0.14642608, -0.08876359, -0.27770814, -0.25106782, -0.082514875, 0.05523707, -0.31298104, -0.1368086, 0.37814152, 0.032189853, -0.008601095, -0.15483275, -0.20344211, -0.024224738, -0.12747693, 0.049716156, 0.0946889, -0.08236277, -0.40535507, -0.09445458, -0.55537444, -0.117638126, 0.20131218, -0.29938865, 0.23693521, -0.29540664, 0.08935149, 0.37324956, 0.01921228, -0.0033368187, -0.20008223, -0.026781926, 0.11076178, 0.33815718, 0.2366743, -0.3875283, 0.07911281, 0.14018981, 0.2484453, 0.14206566, -0.03912406, -0.124510124, 0.12799053, -0.20131487, 0.18244149, -0.21000002, 0.18165322, -0.2643093, -0.20578533, 0.2962612, -0.4071083, -0.0464723, 0.094553664, 0.25337783, 0.009862542, -0.050271366, -0.07392379, 0.08882002, 0.17944442, 0.16397862, -0.39179775, 0.26736665, -0.0192729, -0.018442439, -0.047249265, 0.18863918, 0.27062738, 0.12270789, -0.38781646, -0.14433745, 0.10137269, 0.27908146, -0.22974536, 0.15082045, -0.28768408, -0.38333547, -0.14000101, 0.21542954, 0.22084652, 0.16104579, -0.27537724, 0.18188491, -0.085308656, -0.42232037, -0.3777818, -0.11865082, 0.24589635, 0.19026503, 0.16947359, 0.26346776, 0.01202057, 0.121433474, 0.17466974, 0.17356977, -0.14497308, 0.18488164, -0.37579408, -0.06502906, -0.26128498, -0.1877009, -0.23865594, 0.38699523, -0.23804992, 0.23802643, 0.37530044, -0.31416655, -0.090628475, 0.15442699, 0.083323605, 0.08340364, -0.10496614, 0.19882442, 0.17857088, -0.09811273, 0.2436273, 0.019029541, 0.26368392, 0.18991555, 0.10516484, 0.123206705, 0.1514704, -0.13549489, 0.059166815, 0.020459305, -0.018234596, -0.26658913, -0.15862788, 0.22305442, -0.029723663, 0.038318057, -0.16501893, -0.09223226, 0.0057948544, 0.39988422, -0.38048235, 0.25966093, 0.09570852, 0.14424734, -0.23434037, -0.21592666, 0.056217305, 0.17857862, -0.39444405, -0.010454616, 0.1634386, 0.10004582, 0.20535669, 0.2486722, -0.0013606278, -0.10910795, 0.47047195, -0.14693314, -0.13118234, 0.2695527, -0.2521114, -0.26835597, 0.27185902, -0.03241541, 0.29947615, 0.12267158, 0.03249368, 0.08601499, -0.5978525, 0.07179763, -0.46106672, -0.0033933884, 0.010675606, -0.090698555, -0.20369855, 0.15400767, 0.2822103, -0.26237437, -0.023959426, 0.22057839, 0.088779986, -0.10327962, 0.48200354, 0.009818012, 0.21735828, -0.09726218, 0.2565691, -0.2176083, 0.2767009, -0.26876214, -0.1043232, 0.019080801, 0.109899275, 0.046640366, -0.05314609, -0.3256458, 0.22454666, -0.0037985877, -0.03338025, -0.05607982, 0.10121204, 0.014488875, 0.044235453, 0.05585344, 0.3089643, 0.19958188, -0.034279544, -0.37689233, -0.02067298, -0.108184114, 0.04761538, 0.014341503, -0.011753305, 0.4448616, -0.09475451, 0.011186096, -0.1306354, 0.24714595, 0.21111813, 0.12608668, 0.13853198, 0.07227103, 0.14366136, -0.04700943, -0.0034062795, -0.16412996, -0.21816893, -0.27851155, 0.21785723, -0.2318628, -0.17842, 0.17996907, 0.23085351, -0.13216704, 0.135295, 0.32747313, 0.105372675, -0.14992404, 0.2551415, -0.09932614, 0.08560063, 0.32468835, -0.027163632, 0.1535825, 0.5111959, 0.20384456, -0.37404054, -0.041242123, -0.22566558, -0.0072444435, 0.26074654, -0.1534884, 0.18430914, 0.37578115, 0.29331762, 0.43657038, 0.018274307, -0.11042639, 0.09899991, 0.20794784, 0.017362932, -0.16367818, -0.16474143, 0.25822097, 0.061900627, -0.15261118, -0.04392805, -0.13885197, 0.028419657, -0.12466939, -0.38992488, 0.04835484, 0.20757526, -0.4971978, 0.09418101, -0.2879418, 0.055356182, -0.2286889, 0.2083682, -0.25274304, -0.10421833, 0.3857167, -0.09897424, 0.053298876, -0.168115, -0.16063228, 0.014610065, 0.028016137, -0.01940221, -0.018098384, 0.3685852, -0.099855445, 0.054043293, 0.0076535707, 0.1977779, -0.060267262, 0.17461002, 0.025309464, -0.14138436, -0.36965942, 0.14341168, -0.22054991, -0.41669595, -0.350635, 0.35354385, -0.14340346, -0.26205418, -0.21479966, -0.25892776, 0.06292348, 0.17838463, 0.44896346, -0.4010159, -0.082289286, 0.4955979, -0.06996464, -0.16989557, 0.2896182, 0.20098865, -0.31606963, 0.31730008, 0.27175304, -0.039616365, 0.03201407, 0.5099212, 0.13284244, 0.19106193, -0.2276362, 0.44946808, -0.24190746, 0.31880343, -0.1655242, -0.219303, -0.2011259, 0.0012589244, 0.31311673, 0.18145496, -0.41264796, -0.09883746, 0.02824915, 0.35613367, -0.35888487, -0.06726893, -0.0065832324, -0.34199518, 0.13907939, 0.061010145, 0.20799887, -0.3716825, -0.004743823, 0.39617428, -0.31897175, 0.12047265, 0.32285565, 0.10910455, 0.36702907, -0.027534468, -0.003306972, 0.060804185, -0.23777582, -0.0062758606, 0.11205024, 0.55437744, 0.17333084, -0.40158457, 0.08618948, 0.2623983, -0.16576529, 0.30049047, -0.085133836, -0.06609498, 0.27484143, -0.029358897, 0.15490839, -0.09690879, -0.23637532, -0.3109882, 0.38827002, -0.21937749, -0.10091087, -0.18243739, -0.1090896, -0.13279268, 0.04819067, -0.37471464, 0.33432263, 0.13068555, -0.18624684, -0.092815556, -0.09915114, -0.1402976, -0.21539931, -0.26148954, 0.43860984, -0.14875218, -0.45604524, 0.26144618, 0.046405524, 0.3500595, 0.036529973, 0.10993026, -0.06356534, 0.15648735, 0.10038366, -0.12816685, 0.26705813, 0.058001406, -0.5319837, -0.13663763, -0.23581989, 0.08892639, 0.1951976, -0.332798, 0.041280366, 0.014529156, 0.1333642, 0.025140464, -0.08970695, -0.07874118, 0.3977039, 0.23984994, 0.28039792, 0.08364948, 0.22422723, -0.038728643, -0.33996713, 0.034016807, 0.0771055, -0.20905045, 0.43872592, -0.08042428, -0.38293678, -0.06786471, 0.4005434, 0.099631764, 0.0023761084, -0.034679968, 0.22867568, 0.15132599, -0.13165158, 0.17360209, -0.023124326, -0.11094245, -0.123592794, 0.060751006, -0.2161707, 0.06489437, -0.16699107, -0.0067672064, -0.21374385, 0.006120859, -0.18749413, 0.25970832, -0.3388818, 0.09158317, 0.06126679, 0.28619418, -0.35591418, -0.1678413, -0.08147895, 0.17794101, 0.27810583, 0.3330661, 0.018313073, 0.018819055, -0.1754481, -0.22454868, 0.05389455, -0.22023496, 0.15060861, 0.07419974, 0.24520048, -0.32962036, -0.17017749, 0.18325652, -0.08431439, -0.14495264, 0.40784538, 0.25344115, 0.22945794, 0.026034003, 0.25645342, 0.009030903, -0.17643104, -0.12609684, -0.26928347, 0.07151792, -0.077939935, -0.07463445, -0.06530657, -0.12808533, -0.20940332, -0.14916818, 0.15249917, 0.13223007, 0.029716063, -0.060736544, -0.008027437, -0.27261928, 0.32002795, 0.022525271, 0.05300195, -0.049036894, 0.039181605, -0.14347084, 0.22075538, 0.2040532, 0.07992179, -0.18918262, -0.062667236, -0.28125694, -0.36992085, 0.06321907, 0.14176568, 0.107254215, -0.076132506, -0.26557773, 0.0027493138, -0.14170834, 0.18940176, 0.02132048, -0.16085538, -0.09090096, -0.069289364, -0.027277801, 0.076331265, -0.19630285, -0.1918632, -0.10226693, -0.08679162, -0.071425855, 0.338751, -0.055547565, 0.28674108, -0.15268509, 6.1385415E-4, -0.15316467, 0.10762864, -0.06699272, 0.044879604, 0.277528, -0.4518037, -0.16449177, -0.025337236, -0.20989054, -0.121194035, -0.06343536, -0.0277429, 0.22775164, -0.3471569, 0.2355159, -0.12448876, 0.19339418, -0.07414347, -0.26808125, -0.1616031, 1.1864491E-4, 0.2448654, -0.32890862, -0.2461858, -0.26607546, -0.10263033, -0.08375746, -0.25876787, 0.43655652, -0.11765381, -0.07028996, 0.0068271267, 0.45646456, 0.19998655, 0.162282, 0.21107695, -0.033425715, 0.03880097, 0.13234338, -0.46868268, 0.21881902, -0.22336914, -0.116309024, 0.005304634, 0.09728798, -0.015745807, 0.040650856, -0.13960604, -0.08813464, 0.22259608, -0.38552696, -0.060650844, 0.25103465, 0.14920752, -0.25241658, 0.0152171375, 0.12354388, 0.386575, 0.08479981, -0.21507308, 0.09969368, -0.32417792, -0.03249409, -0.18413575, -0.31314823, 0.13829513, -0.06986319, 0.087390184, -0.09777708, -0.30072027, 0.21826608, -0.05674966, -0.09027065, 0.42604133, -8.4666326E-4, -0.11587073, 0.14037885, 0.016692076, 0.020549806, -0.096504934, 0.26699734, 0.2102296, -0.28719124, 0.16563456, -0.13212183, -0.047723904, -0.09555403]}] |[{doc_similarity_rankings, 0, 89, Sixth document, Florence in Italy, is a very beautiful city in Europe like Lyon in France., {pieceId -> -1, lshId -> 1293373212, isWordStart -> true, token -> Sixth document, Florence in Italy, is a very beautiful city in Europe like Lyon in France., lshNeighbors -> [(-1320876223,0.17848861258809434),(-1719102856,0.2876650539432857),(-1548374770,0.3225589844685982),(1274183715,0.34962777859773414),(-612640902,0.3656377678477694),(1510101612,0.3979194244143298),(1634839239,0.39846872824443047)], sentence -> 0}, [-0.023446266, -0.20165044, -0.21878108, -0.041623246, 0.11712541, 0.17356895, 0.25060305, -0.07937133, -0.10039697, -0.15663107, 0.23232257, -0.01931893, -0.12145367, 0.10105985, -0.13435838, 0.49187884, 0.23358476, -0.45301625, 0.026803127, -0.04935983, -0.26145518, 0.05165556, 0.45950112, 0.306874, 0.0998373, 0.078277044, -0.14320722, -0.01781481, 0.19437787, 0.25057974, 0.2904214, 0.05932349, 0.102811575, 0.2567444, -0.24400775, 0.076585956, -0.3172145, 0.004280416, 0.23430033, -0.19544546, -0.08320522, 0.1736987, 0.20459501, -0.16766919, -0.11249152, 0.4083342, 0.23185952, 0.022361012, -0.12112098, -0.10556416, -0.36327595, 0.3500205, 0.3062079, 0.19041865, 0.0067362506, 0.039656542, -0.17330945, 0.25340703, -0.08069101, -0.119429514, -0.13032362, -0.20993283, -0.0167645, -0.050086897, 0.0056060716, -0.15245143, 0.08916381, -0.1656918, -0.14617592, 0.07238135, -0.09877798, 0.1185568, 0.16529602, -0.2908418, -0.2838721, 0.049403507, -0.6004667, -0.12694398, 0.300743, 0.42590234, -0.1330224, 0.21685761, 0.042737015, 0.23039605, -0.005907735, -0.046434082, -0.051799156, -0.10921319, 0.19471209, 0.280458, -0.21468745, -0.389082, 0.06603544, 0.0058575007, -0.092998825, -0.009324989, -0.038295563, -0.05944777, -0.17968194, -0.2114523, 0.07265465, -0.2734195, -0.1358477, 0.25927955, -0.031233825, -0.19235252, -0.029086787, 0.2901101, 0.06723385, -0.102146484, -0.16508116, 0.43281472, 0.32636094, 0.009097091, 0.01839175, 0.18650065, 0.10878458, -0.28116676, 0.40945655, -0.30725458, -0.022352053, -0.11574949, 0.14054005, 0.14310767, -0.21440077, 0.26043293, 0.13321282, 0.27419066, 0.17493708, 0.085719526, -0.04549965, 0.14518753, -0.11650214, 0.1432643, 0.2130986, 0.12127107, -0.013671406, -0.34709513, -0.22083285, 0.25214735, 0.35938737, 0.17598711, -0.04147198, 0.18509206, 0.10416745, 0.22944078, 0.16071385, -0.3968478, 0.04381103, 0.34408224, 0.1280205, 0.14642608, -0.08876359, -0.27770814, -0.25106782, -0.082514875, 0.05523707, -0.31298104, -0.1368086, 0.37814152, 0.032189853, -0.008601095, -0.15483275, -0.20344211, -0.024224738, -0.12747693, 0.049716156, 0.0946889, -0.08236277, -0.40535507, -0.09445458, -0.55537444, -0.117638126, 0.20131218, -0.29938865, 0.23693521, -0.29540664, 0.08935149, 0.37324956, 0.01921228, -0.0033368187, -0.20008223, -0.026781926, 0.11076178, 0.33815718, 0.2366743, -0.3875283, 0.07911281, 0.14018981, 0.2484453, 0.14206566, -0.03912406, -0.124510124, 0.12799053, -0.20131487, 0.18244149, -0.21000002, 0.18165322, -0.2643093, -0.20578533, 0.2962612, -0.4071083, -0.0464723, 0.094553664, 0.25337783, 0.009862542, -0.050271366, -0.07392379, 0.08882002, 0.17944442, 0.16397862, -0.39179775, 0.26736665, -0.0192729, -0.018442439, -0.047249265, 0.18863918, 0.27062738, 0.12270789, -0.38781646, -0.14433745, 0.10137269, 0.27908146, -0.22974536, 0.15082045, -0.28768408, -0.38333547, -0.14000101, 0.21542954, 0.22084652, 0.16104579, -0.27537724, 0.18188491, -0.085308656, -0.42232037, -0.3777818, -0.11865082, 0.24589635, 0.19026503, 0.16947359, 0.26346776, 0.01202057, 0.121433474, 0.17466974, 0.17356977, -0.14497308, 0.18488164, -0.37579408, -0.06502906, -0.26128498, -0.1877009, -0.23865594, 0.38699523, -0.23804992, 0.23802643, 0.37530044, -0.31416655, -0.090628475, 0.15442699, 0.083323605, 0.08340364, -0.10496614, 0.19882442, 0.17857088, -0.09811273, 0.2436273, 0.019029541, 0.26368392, 0.18991555, 0.10516484, 0.123206705, 0.1514704, -0.13549489, 0.059166815, 0.020459305, -0.018234596, -0.26658913, -0.15862788, 0.22305442, -0.029723663, 0.038318057, -0.16501893, -0.09223226, 0.0057948544, 0.39988422, -0.38048235, 0.25966093, 0.09570852, 0.14424734, -0.23434037, -0.21592666, 0.056217305, 0.17857862, -0.39444405, -0.010454616, 0.1634386, 0.10004582, 0.20535669, 0.2486722, -0.0013606278, -0.10910795, 0.47047195, -0.14693314, -0.13118234, 0.2695527, -0.2521114, -0.26835597, 0.27185902, -0.03241541, 0.29947615, 0.12267158, 0.03249368, 0.08601499, -0.5978525, 0.07179763, -0.46106672, -0.0033933884, 0.010675606, -0.090698555, -0.20369855, 0.15400767, 0.2822103, -0.26237437, -0.023959426, 0.22057839, 0.088779986, -0.10327962, 0.48200354, 0.009818012, 0.21735828, -0.09726218, 0.2565691, -0.2176083, 0.2767009, -0.26876214, -0.1043232, 0.019080801, 0.109899275, 0.046640366, -0.05314609, -0.3256458, 0.22454666, -0.0037985877, -0.03338025, -0.05607982, 0.10121204, 0.014488875, 0.044235453, 0.05585344, 0.3089643, 0.19958188, -0.034279544, -0.37689233, -0.02067298, -0.108184114, 0.04761538, 0.014341503, -0.011753305, 0.4448616, -0.09475451, 0.011186096, -0.1306354, 0.24714595, 0.21111813, 0.12608668, 0.13853198, 0.07227103, 0.14366136, -0.04700943, -0.0034062795, -0.16412996, -0.21816893, -0.27851155, 0.21785723, -0.2318628, -0.17842, 0.17996907, 0.23085351, -0.13216704, 0.135295, 0.32747313, 0.105372675, -0.14992404, 0.2551415, -0.09932614, 0.08560063, 0.32468835, -0.027163632, 0.1535825, 0.5111959, 0.20384456, -0.37404054, -0.041242123, -0.22566558, -0.0072444435, 0.26074654, -0.1534884, 0.18430914, 0.37578115, 0.29331762, 0.43657038, 0.018274307, -0.11042639, 0.09899991, 0.20794784, 0.017362932, -0.16367818, -0.16474143, 0.25822097, 0.061900627, -0.15261118, -0.04392805, -0.13885197, 0.028419657, -0.12466939, -0.38992488, 0.04835484, 0.20757526, -0.4971978, 0.09418101, -0.2879418, 0.055356182, -0.2286889, 0.2083682, -0.25274304, -0.10421833, 0.3857167, -0.09897424, 0.053298876, -0.168115, -0.16063228, 0.014610065, 0.028016137, -0.01940221, -0.018098384, 0.3685852, -0.099855445, 0.054043293, 0.0076535707, 0.1977779, -0.060267262, 0.17461002, 0.025309464, -0.14138436, -0.36965942, 0.14341168, -0.22054991, -0.41669595, -0.350635, 0.35354385, -0.14340346, -0.26205418, -0.21479966, -0.25892776, 0.06292348, 0.17838463, 0.44896346, -0.4010159, -0.082289286, 0.4955979, -0.06996464, -0.16989557, 0.2896182, 0.20098865, -0.31606963, 0.31730008, 0.27175304, -0.039616365, 0.03201407, 0.5099212, 0.13284244, 0.19106193, -0.2276362, 0.44946808, -0.24190746, 0.31880343, -0.1655242, -0.219303, -0.2011259, 0.0012589244, 0.31311673, 0.18145496, -0.41264796, -0.09883746, 0.02824915, 0.35613367, -0.35888487, -0.06726893, -0.0065832324, -0.34199518, 0.13907939, 0.061010145, 0.20799887, -0.3716825, -0.004743823, 0.39617428, -0.31897175, 0.12047265, 0.32285565, 0.10910455, 0.36702907, -0.027534468, -0.003306972, 0.060804185, -0.23777582, -0.0062758606, 0.11205024, 0.55437744, 0.17333084, -0.40158457, 0.08618948, 0.2623983, -0.16576529, 0.30049047, -0.085133836, -0.06609498, 0.27484143, -0.029358897, 0.15490839, -0.09690879, -0.23637532, -0.3109882, 0.38827002, -0.21937749, -0.10091087, -0.18243739, -0.1090896, -0.13279268, 0.04819067, -0.37471464, 0.33432263, 0.13068555, -0.18624684, -0.092815556, -0.09915114, -0.1402976, -0.21539931, -0.26148954, 0.43860984, -0.14875218, -0.45604524, 0.26144618, 0.046405524, 0.3500595, 0.036529973, 0.10993026, -0.06356534, 0.15648735, 0.10038366, -0.12816685, 0.26705813, 0.058001406, -0.5319837, -0.13663763, -0.23581989, 0.08892639, 0.1951976, -0.332798, 0.041280366, 0.014529156, 0.1333642, 0.025140464, -0.08970695, -0.07874118, 0.3977039, 0.23984994, 0.28039792, 0.08364948, 0.22422723, -0.038728643, -0.33996713, 0.034016807, 0.0771055, -0.20905045, 0.43872592, -0.08042428, -0.38293678, -0.06786471, 0.4005434, 0.099631764, 0.0023761084, -0.034679968, 0.22867568, 0.15132599, -0.13165158, 0.17360209, -0.023124326, -0.11094245, -0.123592794, 0.060751006, -0.2161707, 0.06489437, -0.16699107, -0.0067672064, -0.21374385, 0.006120859, -0.18749413, 0.25970832, -0.3388818, 0.09158317, 0.06126679, 0.28619418, -0.35591418, -0.1678413, -0.08147895, 0.17794101, 0.27810583, 0.3330661, 0.018313073, 0.018819055, -0.1754481, -0.22454868, 0.05389455, -0.22023496, 0.15060861, 0.07419974, 0.24520048, -0.32962036, -0.17017749, 0.18325652, -0.08431439, -0.14495264, 0.40784538, 0.25344115, 0.22945794, 0.026034003, 0.25645342, 0.009030903, -0.17643104, -0.12609684, -0.26928347, 0.07151792, -0.077939935, -0.07463445, -0.06530657, -0.12808533, -0.20940332, -0.14916818, 0.15249917, 0.13223007, 0.029716063, -0.060736544, -0.008027437, -0.27261928, 0.32002795, 0.022525271, 0.05300195, -0.049036894, 0.039181605, -0.14347084, 0.22075538, 0.2040532, 0.07992179, -0.18918262, -0.062667236, -0.28125694, -0.36992085, 0.06321907, 0.14176568, 0.107254215, -0.076132506, -0.26557773, 0.0027493138, -0.14170834, 0.18940176, 0.02132048, -0.16085538, -0.09090096, -0.069289364, -0.027277801, 0.076331265, -0.19630285, -0.1918632, -0.10226693, -0.08679162, -0.071425855, 0.338751, -0.055547565, 0.28674108, -0.15268509, 6.1385415E-4, -0.15316467, 0.10762864, -0.06699272, 0.044879604, 0.277528, -0.4518037, -0.16449177, -0.025337236, -0.20989054, -0.121194035, -0.06343536, -0.0277429, 0.22775164, -0.3471569, 0.2355159, -0.12448876, 0.19339418, -0.07414347, -0.26808125, -0.1616031, 1.1864491E-4, 0.2448654, -0.32890862, -0.2461858, -0.26607546, -0.10263033, -0.08375746, -0.25876787, 0.43655652, -0.11765381, -0.07028996, 0.0068271267, 0.45646456, 0.19998655, 0.162282, 0.21107695, -0.033425715, 0.03880097, 0.13234338, -0.46868268, 0.21881902, -0.22336914, -0.116309024, 0.005304634, 0.09728798, -0.015745807, 0.040650856, -0.13960604, -0.08813464, 0.22259608, -0.38552696, -0.060650844, 0.25103465, 0.14920752, -0.25241658, 0.0152171375, 0.12354388, 0.386575, 0.08479981, -0.21507308, 0.09969368, -0.32417792, -0.03249409, -0.18413575, -0.31314823, 0.13829513, -0.06986319, 0.087390184, -0.09777708, -0.30072027, 0.21826608, -0.05674966, -0.09027065, 0.42604133, -8.4666326E-4, -0.11587073, 0.14037885, 0.016692076, 0.020549806, -0.096504934, 0.26699734, 0.2102296, -0.28719124, 0.16563456, -0.13212183, -0.047723904, -0.09555403]}] |1293373212 |[(-1320876223,0.17848861258809434),(-1719102856,0.2876650539432857),(-1548374770,0.3225589844685982),(1274183715,0.34962777859773414),(-612640902,0.3656377678477694),(1510101612,0.3979194244143298),(1634839239,0.39846872824443047)] |-1320876223 |0.17848861258809434 |\n", - "|Seventh document, the French Riviera is the Mediterranean coastline of the southeast corner of France.|[{document, 0, 101, Seventh document, the French Riviera is the Mediterranean coastline of the southeast corner of France., {sentence -> 0}, []}]|[{document, 0, 101, Seventh document, the French Riviera is the Mediterranean coastline of the southeast corner of France., {sentence -> 0}, []}] |[{token, 0, 6, Seventh, {sentence -> 0}, []}, {token, 8, 15, document, {sentence -> 0}, []}, {token, 16, 16, ,, {sentence -> 0}, []}, {token, 18, 20, the, {sentence -> 0}, []}, {token, 22, 27, French, {sentence -> 0}, []}, {token, 29, 35, Riviera, {sentence -> 0}, []}, {token, 37, 38, is, {sentence -> 0}, []}, {token, 40, 42, the, {sentence -> 0}, []}, {token, 44, 56, Mediterranean, {sentence -> 0}, []}, {token, 58, 66, coastline, {sentence -> 0}, []}, {token, 68, 69, of, {sentence -> 0}, []}, {token, 71, 73, the, {sentence -> 0}, []}, {token, 75, 83, southeast, {sentence -> 0}, []}, {token, 85, 90, corner, {sentence -> 0}, []}, {token, 92, 93, of, {sentence -> 0}, []}, {token, 95, 100, France, {sentence -> 0}, []}, {token, 101, 101, ., {sentence -> 0}, []}] |[{sentence_embeddings, 0, 101, Seventh document, the French Riviera is the Mediterranean coastline of the southeast corner of France., {sentence -> 0, token -> Seventh document, the French Riviera is the Mediterranean coastline of the southeast corner of France., pieceId -> -1, isWordStart -> true}, [-0.008019538, -0.20794445, -0.22693227, -0.066497125, 0.13135312, 0.18650925, 0.26249522, -0.078561835, -0.108032554, -0.16549937, 0.23456018, -0.0029051038, -0.09654328, 0.1081486, -0.12367482, 0.48534602, 0.21623878, -0.45364743, 0.019192278, -0.02995625, -0.2504148, 0.050217465, 0.4518648, 0.32667267, 0.10702716, 0.0853897, -0.14142184, -0.03167035, 0.19115756, 0.24786223, 0.2914291, 0.05494155, 0.10942929, 0.24128707, -0.25258842, 0.06971938, -0.30623087, 0.009310224, 0.24930832, -0.21145473, -0.071628265, 0.16028066, 0.2130934, -0.15951225, -0.12180848, 0.41019738, 0.2232628, 0.035024345, -0.1233715, -0.09511672, -0.37004888, 0.3324171, 0.28777122, 0.19508287, -0.0010060468, 0.033361237, -0.15824829, 0.23507485, -0.07277971, -0.10862046, -0.1266775, -0.20873529, -0.016099958, -0.054443464, 0.03418388, -0.15287445, 0.08516955, -0.16009997, -0.13519952, 0.06594492, -0.09619981, 0.13498701, 0.17068765, -0.27748087, -0.27755773, 0.050838053, -0.60263795, -0.110697724, 0.29348016, 0.43039024, -0.10951068, 0.18981335, 0.045793816, 0.20932128, -0.013209117, -0.045443077, -0.04795611, -0.09545004, 0.18174982, 0.27740657, -0.20092762, -0.3743948, 0.070061244, 0.021248285, -0.10003458, -0.012923969, -0.015641354, -0.069142535, -0.18447016, -0.20632346, 0.061669473, -0.26073086, -0.13162859, 0.2652085, -0.020487502, -0.20239983, -0.019889005, 0.28115276, 0.06268703, -0.10772411, -0.17282209, 0.42130953, 0.3386013, 0.01063591, 0.012493721, 0.18252686, 0.11687154, -0.289261, 0.41447076, -0.3171783, -0.007732441, -0.12554668, 0.12227292, 0.14982867, -0.1994947, 0.2653794, 0.13194634, 0.27858624, 0.18165465, 0.09452338, -0.036926605, 0.14094561, -0.10714303, 0.14023595, 0.21794924, 0.11931853, 0.0083964225, -0.34428638, -0.2060787, 0.2581659, 0.34725285, 0.17249854, -0.035370246, 0.18123226, 0.115042284, 0.21278691, 0.16332792, -0.39327794, 0.043199714, 0.3466151, 0.10552055, 0.1811821, -0.094810925, -0.25144812, -0.2369235, -0.098048225, 0.053571597, -0.3044636, -0.117047854, 0.37140828, 0.03258837, 0.010649239, -0.1384685, -0.2107958, -0.04297339, -0.11824401, 0.022123057, 0.09906902, -0.08942813, -0.40615627, -0.09174467, -0.54138595, -0.12516883, 0.20398387, -0.3002764, 0.23201002, -0.2881544, 0.09412716, 0.38156128, 0.027156925, 0.008762595, -0.18784012, -0.022351125, 0.12036052, 0.3264586, 0.24900058, -0.39910597, 0.079319164, 0.15601031, 0.23772217, 0.13825494, -0.04249709, -0.11666459, 0.12905037, -0.19663046, 0.1725472, -0.22609569, 0.18567988, -0.2612912, -0.21441153, 0.29490396, -0.4131635, -0.044802688, 0.07255378, 0.25608072, 0.004686255, -0.05354772, -0.096422195, 0.10385672, 0.16806516, 0.16490765, -0.40542477, 0.27064338, -0.036462914, -0.031986397, -0.029910486, 0.1873078, 0.25759354, 0.10744408, -0.38749996, -0.13174862, 0.09937129, 0.26613194, -0.22159924, 0.18355416, -0.2635393, -0.37645352, -0.12610617, 0.20294958, 0.21151984, 0.16929722, -0.26906973, 0.17143078, -0.10153495, -0.42571825, -0.36661187, -0.102374844, 0.22459105, 0.19933926, 0.19093192, 0.24899375, 0.014875878, 0.12345995, 0.17558807, 0.16242947, -0.14835785, 0.172079, -0.36210755, -0.040536694, -0.2759419, -0.20727508, -0.23599353, 0.38549605, -0.20905586, 0.23385757, 0.40176284, -0.31578505, -0.086200155, 0.14503288, 0.087825194, 0.05920683, -0.11740758, 0.21260233, 0.19665754, -0.10235794, 0.258057, -0.014308026, 0.25915918, 0.16674384, 0.10508859, 0.11316951, 0.12858894, -0.15207034, 0.044372506, 0.010741287, -0.026460659, -0.23678094, -0.13959345, 0.22005278, -0.046174966, 0.029175762, -0.15699074, -0.07535748, 0.009295726, 0.39943105, -0.3637203, 0.26161516, 0.08734801, 0.14933671, -0.2341384, -0.21599188, 0.072215594, 0.18299405, -0.39975247, 0.0035565842, 0.14676017, 0.10756498, 0.22721377, 0.25173554, -0.006123567, -0.094908014, 0.4766384, -0.14474754, -0.13256605, 0.2748037, -0.24513212, -0.2763986, 0.2767801, -0.02837765, 0.3070326, 0.14310399, 0.02803333, 0.07966456, -0.59598786, 0.0634747, -0.46667746, 2.0662788E-4, 0.027827349, -0.09846748, -0.2049167, 0.14928561, 0.28899723, -0.24984509, -0.027508143, 0.21127011, 0.07993328, -0.10535655, 0.49076062, -0.0016710946, 0.22241512, -0.0761468, 0.26699233, -0.2132116, 0.26694208, -0.2699361, -0.102920294, 0.023504116, 0.08591295, 0.047616553, -0.03864741, -0.32397145, 0.22423643, 3.750706E-4, -0.052303057, -0.051092364, 0.10266755, 0.007874346, 0.056537174, 0.052040357, 0.3099191, 0.2147011, -0.041306116, -0.37230852, -0.02358203, -0.093814276, 0.06317558, 0.021807505, -0.0052324715, 0.44351, -0.084359095, 0.02457905, -0.120074786, 0.25709224, 0.20523037, 0.13982376, 0.12155931, 0.085087486, 0.15450434, -0.033903852, -0.008247593, -0.14950626, -0.22513278, -0.28045303, 0.20739494, -0.22394833, -0.16329561, 0.18090749, 0.21472976, -0.119760044, 0.122836865, 0.31932604, 0.10612227, -0.17157494, 0.2600004, -0.09567128, 0.08662764, 0.31872585, -0.032042537, 0.16470939, 0.50351644, 0.21122353, -0.36921608, -0.033876106, -0.22228853, -0.01841526, 0.26909, -0.15758824, 0.18417774, 0.3548506, 0.31725127, 0.43062854, 0.01227282, -0.12594888, 0.08146904, 0.21303466, 0.022855874, -0.1579743, -0.15339917, 0.2802583, 0.056297414, -0.14544499, -0.03212101, -0.14632796, 0.014579217, -0.13752632, -0.41132227, 0.04383321, 0.21508393, -0.4907415, 0.094107084, -0.31429338, 0.046641294, -0.2345635, 0.21059625, -0.24306759, -0.1063646, 0.3705863, -0.09811044, 0.0605867, -0.18104103, -0.14497119, 0.029493399, 0.03492019, -0.029783124, -0.013816307, 0.35751185, -0.10965113, 0.041228097, 0.013336186, 0.20357218, -0.069435954, 0.18225056, 0.027941927, -0.15454903, -0.3813045, 0.15006964, -0.19400018, -0.43550804, -0.36777005, 0.36024705, -0.1553815, -0.25671393, -0.2242871, -0.25538933, 0.05633095, 0.17045896, 0.46840438, -0.40827343, -0.06883438, 0.49420673, -0.054230284, -0.1784511, 0.3003717, 0.20356534, -0.30255544, 0.32934627, 0.28107437, -0.043982994, 0.030254403, 0.50613445, 0.106514424, 0.16891049, -0.21837108, 0.43107754, -0.22679833, 0.3222671, -0.14380512, -0.22103326, -0.20031895, -0.0053463904, 0.31086656, 0.18283913, -0.4164982, -0.10497948, 0.05488763, 0.3499168, -0.38303393, -0.0706, 0.02012401, -0.34419113, 0.14620271, 0.06066406, 0.23910847, -0.36826327, -0.0031818626, 0.39364952, -0.31577078, 0.13167827, 0.32565027, 0.09713475, 0.35657045, -0.031938132, 0.0063300836, 0.058500852, -0.24127181, -0.009309541, 0.1374363, 0.55266875, 0.15968034, -0.386893, 0.09495158, 0.24035685, -0.16255692, 0.29488644, -0.079560675, -0.06405346, 0.25573367, -0.031631544, 0.15875565, -0.089678556, -0.22789505, -0.30908328, 0.37180826, -0.20641276, -0.10318238, -0.16293754, -0.08793884, -0.13343672, 0.06730986, -0.36669317, 0.3368156, 0.12109322, -0.19045906, -0.08927799, -0.06881458, -0.15314971, -0.2044369, -0.2583901, 0.4240869, -0.16738205, -0.45303494, 0.2660113, 0.036534272, 0.34492457, 0.03064578, 0.1161642, -0.07246866, 0.14535797, 0.094102405, -0.13236499, 0.281681, 0.06910245, -0.5319742, -0.14167535, -0.2213673, 0.10062441, 0.1823384, -0.32449323, 0.045420066, 0.020613493, 0.14170578, 0.054557852, -0.081435464, -0.08237418, 0.39456445, 0.22010934, 0.27721396, 0.079628035, 0.21498804, -0.030950392, -0.3197281, 0.047628313, 0.06835473, -0.19502498, 0.4497397, -0.07986492, -0.37665737, -0.06270121, 0.40801352, 0.087262504, 0.013305801, -0.051705398, 0.20942184, 0.1442438, -0.12746692, 0.1790049, -0.019305615, -0.118932344, -0.10855935, 0.07830948, -0.213931, 0.03763846, -0.16343854, -0.022269689, -0.22339572, 0.03815981, -0.21411316, 0.26194704, -0.32970122, 0.09410265, 0.07144776, 0.28778374, -0.3441213, -0.16373956, -0.07426902, 0.160929, 0.27774033, 0.3449314, 0.032607112, 0.01710335, -0.1717972, -0.2522701, 0.056452833, -0.20888108, 0.15573634, 0.07741951, 0.21761471, -0.3139925, -0.18512204, 0.19587885, -0.093458064, -0.15175745, 0.39264217, 0.24714977, 0.23077905, 0.011358897, 0.2595526, 0.011583311, -0.18557008, -0.14353761, -0.25729296, 0.07521876, -0.096184544, -0.040979125, -0.05232608, -0.11970768, -0.21794261, -0.15748894, 0.14131306, 0.12565142, 0.013618528, -0.042706285, -0.009846034, -0.26986337, 0.32127848, 0.012022808, 0.053931307, -0.053089615, 0.04204643, -0.14650013, 0.2337747, 0.22185645, 0.084541485, -0.18815027, -0.05060835, -0.26873425, -0.34384862, 0.04447537, 0.14540635, 0.10909279, -0.07547726, -0.24596812, 0.01701418, -0.12477274, 0.17758216, -0.003989822, -0.15708753, -0.10013234, -0.060486514, -0.023525922, 0.07570741, -0.2043602, -0.20318846, -0.09272064, -0.08396323, -0.06631228, 0.34927878, -0.042691473, 0.30072594, -0.14322798, -0.0010577674, -0.16078945, 0.10943518, -0.065818414, 0.048736267, 0.27629223, -0.46065903, -0.15741968, -0.00574388, -0.20248723, -0.13032655, -0.075273484, -0.019586748, 0.22265866, -0.3558014, 0.2260905, -0.096864544, 0.18844095, -0.07203457, -0.2636315, -0.14861545, 0.0066989767, 0.24385743, -0.34070122, -0.25452897, -0.25891387, -0.09419047, -0.07291724, -0.24225563, 0.4279058, -0.11666133, -0.07513852, 0.0018023223, 0.448046, 0.20821133, 0.15960689, 0.228467, -0.026781378, 0.043798983, 0.11893418, -0.46809036, 0.2198233, -0.23621814, -0.1333021, 0.0064931093, 0.09569369, -0.024951685, 0.041569363, -0.11953565, -0.09252381, 0.22401738, -0.36581153, -0.070181325, 0.27557525, 0.16817227, -0.26221034, 0.036853302, 0.122759596, 0.38257086, 0.08625116, -0.2152178, 0.11895082, -0.33421177, -0.027113844, -0.18887155, -0.29267368, 0.14328273, -0.077010274, 0.08095182, -0.09670088, -0.29266798, 0.21065508, -0.058506, -0.08751463, 0.4218339, -0.0031686411, -0.12668662, 0.13693617, 0.010775516, 0.03800761, -0.101201124, 0.26593548, 0.20864183, -0.28382388, 0.15653086, -0.12020002, -0.031083556, -0.09614674]}]|[{doc_similarity_rankings, 0, 101, Seventh document, the French Riviera is the Mediterranean coastline of the southeast corner of France., {pieceId -> -1, lshId -> -1548374770, isWordStart -> true, token -> Seventh document, the French Riviera is the Mediterranean coastline of the southeast corner of France., lshNeighbors -> [(-1719102856,0.2329717161223739),(1274183715,0.3181712969830674),(-612640902,0.31909423657258823),(1293373212,0.3225589844685982),(-1320876223,0.32312628638943774),(1634839239,0.39013800843493296),(1510101612,0.41026101952006294)], sentence -> 0}, [-0.008019538, -0.20794445, -0.22693227, -0.066497125, 0.13135312, 0.18650925, 0.26249522, -0.078561835, -0.108032554, -0.16549937, 0.23456018, -0.0029051038, -0.09654328, 0.1081486, -0.12367482, 0.48534602, 0.21623878, -0.45364743, 0.019192278, -0.02995625, -0.2504148, 0.050217465, 0.4518648, 0.32667267, 0.10702716, 0.0853897, -0.14142184, -0.03167035, 0.19115756, 0.24786223, 0.2914291, 0.05494155, 0.10942929, 0.24128707, -0.25258842, 0.06971938, -0.30623087, 0.009310224, 0.24930832, -0.21145473, -0.071628265, 0.16028066, 0.2130934, -0.15951225, -0.12180848, 0.41019738, 0.2232628, 0.035024345, -0.1233715, -0.09511672, -0.37004888, 0.3324171, 0.28777122, 0.19508287, -0.0010060468, 0.033361237, -0.15824829, 0.23507485, -0.07277971, -0.10862046, -0.1266775, -0.20873529, -0.016099958, -0.054443464, 0.03418388, -0.15287445, 0.08516955, -0.16009997, -0.13519952, 0.06594492, -0.09619981, 0.13498701, 0.17068765, -0.27748087, -0.27755773, 0.050838053, -0.60263795, -0.110697724, 0.29348016, 0.43039024, -0.10951068, 0.18981335, 0.045793816, 0.20932128, -0.013209117, -0.045443077, -0.04795611, -0.09545004, 0.18174982, 0.27740657, -0.20092762, -0.3743948, 0.070061244, 0.021248285, -0.10003458, -0.012923969, -0.015641354, -0.069142535, -0.18447016, -0.20632346, 0.061669473, -0.26073086, -0.13162859, 0.2652085, -0.020487502, -0.20239983, -0.019889005, 0.28115276, 0.06268703, -0.10772411, -0.17282209, 0.42130953, 0.3386013, 0.01063591, 0.012493721, 0.18252686, 0.11687154, -0.289261, 0.41447076, -0.3171783, -0.007732441, -0.12554668, 0.12227292, 0.14982867, -0.1994947, 0.2653794, 0.13194634, 0.27858624, 0.18165465, 0.09452338, -0.036926605, 0.14094561, -0.10714303, 0.14023595, 0.21794924, 0.11931853, 0.0083964225, -0.34428638, -0.2060787, 0.2581659, 0.34725285, 0.17249854, -0.035370246, 0.18123226, 0.115042284, 0.21278691, 0.16332792, -0.39327794, 0.043199714, 0.3466151, 0.10552055, 0.1811821, -0.094810925, -0.25144812, -0.2369235, -0.098048225, 0.053571597, -0.3044636, -0.117047854, 0.37140828, 0.03258837, 0.010649239, -0.1384685, -0.2107958, -0.04297339, -0.11824401, 0.022123057, 0.09906902, -0.08942813, -0.40615627, -0.09174467, -0.54138595, -0.12516883, 0.20398387, -0.3002764, 0.23201002, -0.2881544, 0.09412716, 0.38156128, 0.027156925, 0.008762595, -0.18784012, -0.022351125, 0.12036052, 0.3264586, 0.24900058, -0.39910597, 0.079319164, 0.15601031, 0.23772217, 0.13825494, -0.04249709, -0.11666459, 0.12905037, -0.19663046, 0.1725472, -0.22609569, 0.18567988, -0.2612912, -0.21441153, 0.29490396, -0.4131635, -0.044802688, 0.07255378, 0.25608072, 0.004686255, -0.05354772, -0.096422195, 0.10385672, 0.16806516, 0.16490765, -0.40542477, 0.27064338, -0.036462914, -0.031986397, -0.029910486, 0.1873078, 0.25759354, 0.10744408, -0.38749996, -0.13174862, 0.09937129, 0.26613194, -0.22159924, 0.18355416, -0.2635393, -0.37645352, -0.12610617, 0.20294958, 0.21151984, 0.16929722, -0.26906973, 0.17143078, -0.10153495, -0.42571825, -0.36661187, -0.102374844, 0.22459105, 0.19933926, 0.19093192, 0.24899375, 0.014875878, 0.12345995, 0.17558807, 0.16242947, -0.14835785, 0.172079, -0.36210755, -0.040536694, -0.2759419, -0.20727508, -0.23599353, 0.38549605, -0.20905586, 0.23385757, 0.40176284, -0.31578505, -0.086200155, 0.14503288, 0.087825194, 0.05920683, -0.11740758, 0.21260233, 0.19665754, -0.10235794, 0.258057, -0.014308026, 0.25915918, 0.16674384, 0.10508859, 0.11316951, 0.12858894, -0.15207034, 0.044372506, 0.010741287, -0.026460659, -0.23678094, -0.13959345, 0.22005278, -0.046174966, 0.029175762, -0.15699074, -0.07535748, 0.009295726, 0.39943105, -0.3637203, 0.26161516, 0.08734801, 0.14933671, -0.2341384, -0.21599188, 0.072215594, 0.18299405, -0.39975247, 0.0035565842, 0.14676017, 0.10756498, 0.22721377, 0.25173554, -0.006123567, -0.094908014, 0.4766384, -0.14474754, -0.13256605, 0.2748037, -0.24513212, -0.2763986, 0.2767801, -0.02837765, 0.3070326, 0.14310399, 0.02803333, 0.07966456, -0.59598786, 0.0634747, -0.46667746, 2.0662788E-4, 0.027827349, -0.09846748, -0.2049167, 0.14928561, 0.28899723, -0.24984509, -0.027508143, 0.21127011, 0.07993328, -0.10535655, 0.49076062, -0.0016710946, 0.22241512, -0.0761468, 0.26699233, -0.2132116, 0.26694208, -0.2699361, -0.102920294, 0.023504116, 0.08591295, 0.047616553, -0.03864741, -0.32397145, 0.22423643, 3.750706E-4, -0.052303057, -0.051092364, 0.10266755, 0.007874346, 0.056537174, 0.052040357, 0.3099191, 0.2147011, -0.041306116, -0.37230852, -0.02358203, -0.093814276, 0.06317558, 0.021807505, -0.0052324715, 0.44351, -0.084359095, 0.02457905, -0.120074786, 0.25709224, 0.20523037, 0.13982376, 0.12155931, 0.085087486, 0.15450434, -0.033903852, -0.008247593, -0.14950626, -0.22513278, -0.28045303, 0.20739494, -0.22394833, -0.16329561, 0.18090749, 0.21472976, -0.119760044, 0.122836865, 0.31932604, 0.10612227, -0.17157494, 0.2600004, -0.09567128, 0.08662764, 0.31872585, -0.032042537, 0.16470939, 0.50351644, 0.21122353, -0.36921608, -0.033876106, -0.22228853, -0.01841526, 0.26909, -0.15758824, 0.18417774, 0.3548506, 0.31725127, 0.43062854, 0.01227282, -0.12594888, 0.08146904, 0.21303466, 0.022855874, -0.1579743, -0.15339917, 0.2802583, 0.056297414, -0.14544499, -0.03212101, -0.14632796, 0.014579217, -0.13752632, -0.41132227, 0.04383321, 0.21508393, -0.4907415, 0.094107084, -0.31429338, 0.046641294, -0.2345635, 0.21059625, -0.24306759, -0.1063646, 0.3705863, -0.09811044, 0.0605867, -0.18104103, -0.14497119, 0.029493399, 0.03492019, -0.029783124, -0.013816307, 0.35751185, -0.10965113, 0.041228097, 0.013336186, 0.20357218, -0.069435954, 0.18225056, 0.027941927, -0.15454903, -0.3813045, 0.15006964, -0.19400018, -0.43550804, -0.36777005, 0.36024705, -0.1553815, -0.25671393, -0.2242871, -0.25538933, 0.05633095, 0.17045896, 0.46840438, -0.40827343, -0.06883438, 0.49420673, -0.054230284, -0.1784511, 0.3003717, 0.20356534, -0.30255544, 0.32934627, 0.28107437, -0.043982994, 0.030254403, 0.50613445, 0.106514424, 0.16891049, -0.21837108, 0.43107754, -0.22679833, 0.3222671, -0.14380512, -0.22103326, -0.20031895, -0.0053463904, 0.31086656, 0.18283913, -0.4164982, -0.10497948, 0.05488763, 0.3499168, -0.38303393, -0.0706, 0.02012401, -0.34419113, 0.14620271, 0.06066406, 0.23910847, -0.36826327, -0.0031818626, 0.39364952, -0.31577078, 0.13167827, 0.32565027, 0.09713475, 0.35657045, -0.031938132, 0.0063300836, 0.058500852, -0.24127181, -0.009309541, 0.1374363, 0.55266875, 0.15968034, -0.386893, 0.09495158, 0.24035685, -0.16255692, 0.29488644, -0.079560675, -0.06405346, 0.25573367, -0.031631544, 0.15875565, -0.089678556, -0.22789505, -0.30908328, 0.37180826, -0.20641276, -0.10318238, -0.16293754, -0.08793884, -0.13343672, 0.06730986, -0.36669317, 0.3368156, 0.12109322, -0.19045906, -0.08927799, -0.06881458, -0.15314971, -0.2044369, -0.2583901, 0.4240869, -0.16738205, -0.45303494, 0.2660113, 0.036534272, 0.34492457, 0.03064578, 0.1161642, -0.07246866, 0.14535797, 0.094102405, -0.13236499, 0.281681, 0.06910245, -0.5319742, -0.14167535, -0.2213673, 0.10062441, 0.1823384, -0.32449323, 0.045420066, 0.020613493, 0.14170578, 0.054557852, -0.081435464, -0.08237418, 0.39456445, 0.22010934, 0.27721396, 0.079628035, 0.21498804, -0.030950392, -0.3197281, 0.047628313, 0.06835473, -0.19502498, 0.4497397, -0.07986492, -0.37665737, -0.06270121, 0.40801352, 0.087262504, 0.013305801, -0.051705398, 0.20942184, 0.1442438, -0.12746692, 0.1790049, -0.019305615, -0.118932344, -0.10855935, 0.07830948, -0.213931, 0.03763846, -0.16343854, -0.022269689, -0.22339572, 0.03815981, -0.21411316, 0.26194704, -0.32970122, 0.09410265, 0.07144776, 0.28778374, -0.3441213, -0.16373956, -0.07426902, 0.160929, 0.27774033, 0.3449314, 0.032607112, 0.01710335, -0.1717972, -0.2522701, 0.056452833, -0.20888108, 0.15573634, 0.07741951, 0.21761471, -0.3139925, -0.18512204, 0.19587885, -0.093458064, -0.15175745, 0.39264217, 0.24714977, 0.23077905, 0.011358897, 0.2595526, 0.011583311, -0.18557008, -0.14353761, -0.25729296, 0.07521876, -0.096184544, -0.040979125, -0.05232608, -0.11970768, -0.21794261, -0.15748894, 0.14131306, 0.12565142, 0.013618528, -0.042706285, -0.009846034, -0.26986337, 0.32127848, 0.012022808, 0.053931307, -0.053089615, 0.04204643, -0.14650013, 0.2337747, 0.22185645, 0.084541485, -0.18815027, -0.05060835, -0.26873425, -0.34384862, 0.04447537, 0.14540635, 0.10909279, -0.07547726, -0.24596812, 0.01701418, -0.12477274, 0.17758216, -0.003989822, -0.15708753, -0.10013234, -0.060486514, -0.023525922, 0.07570741, -0.2043602, -0.20318846, -0.09272064, -0.08396323, -0.06631228, 0.34927878, -0.042691473, 0.30072594, -0.14322798, -0.0010577674, -0.16078945, 0.10943518, -0.065818414, 0.048736267, 0.27629223, -0.46065903, -0.15741968, -0.00574388, -0.20248723, -0.13032655, -0.075273484, -0.019586748, 0.22265866, -0.3558014, 0.2260905, -0.096864544, 0.18844095, -0.07203457, -0.2636315, -0.14861545, 0.0066989767, 0.24385743, -0.34070122, -0.25452897, -0.25891387, -0.09419047, -0.07291724, -0.24225563, 0.4279058, -0.11666133, -0.07513852, 0.0018023223, 0.448046, 0.20821133, 0.15960689, 0.228467, -0.026781378, 0.043798983, 0.11893418, -0.46809036, 0.2198233, -0.23621814, -0.1333021, 0.0064931093, 0.09569369, -0.024951685, 0.041569363, -0.11953565, -0.09252381, 0.22401738, -0.36581153, -0.070181325, 0.27557525, 0.16817227, -0.26221034, 0.036853302, 0.122759596, 0.38257086, 0.08625116, -0.2152178, 0.11895082, -0.33421177, -0.027113844, -0.18887155, -0.29267368, 0.14328273, -0.077010274, 0.08095182, -0.09670088, -0.29266798, 0.21065508, -0.058506, -0.08751463, 0.4218339, -0.0031686411, -0.12668662, 0.13693617, 0.010775516, 0.03800761, -0.101201124, 0.26593548, 0.20864183, -0.28382388, 0.15653086, -0.12020002, -0.031083556, -0.09614674]}]|-1548374770 |[(-1719102856,0.2329717161223739),(1274183715,0.3181712969830674),(-612640902,0.31909423657258823),(1293373212,0.3225589844685982),(-1320876223,0.32312628638943774),(1634839239,0.39013800843493296),(1510101612,0.41026101952006294)] |-1719102856 |0.2329717161223739 |\n", - "|Eighth document, the warmest place in France is the French Riviera coast in Southern France. |[{document, 0, 91, Eighth document, the warmest place in France is the French Riviera coast in Southern France., {sentence -> 0}, []}] |[{document, 0, 91, Eighth document, the warmest place in France is the French Riviera coast in Southern France., {sentence -> 0}, []}] |[{token, 0, 5, Eighth, {sentence -> 0}, []}, {token, 7, 14, document, {sentence -> 0}, []}, {token, 15, 15, ,, {sentence -> 0}, []}, {token, 17, 19, the, {sentence -> 0}, []}, {token, 21, 27, warmest, {sentence -> 0}, []}, {token, 29, 33, place, {sentence -> 0}, []}, {token, 35, 36, in, {sentence -> 0}, []}, {token, 38, 43, France, {sentence -> 0}, []}, {token, 45, 46, is, {sentence -> 0}, []}, {token, 48, 50, the, {sentence -> 0}, []}, {token, 52, 57, French, {sentence -> 0}, []}, {token, 59, 65, Riviera, {sentence -> 0}, []}, {token, 67, 71, coast, {sentence -> 0}, []}, {token, 73, 74, in, {sentence -> 0}, []}, {token, 76, 83, Southern, {sentence -> 0}, []}, {token, 85, 90, France, {sentence -> 0}, []}, {token, 91, 91, ., {sentence -> 0}, []}] |[{sentence_embeddings, 0, 91, Eighth document, the warmest place in France is the French Riviera coast in Southern France., {sentence -> 0, token -> Eighth document, the warmest place in France is the French Riviera coast in Southern France., pieceId -> -1, isWordStart -> true}, [-0.016810948, -0.2047661, -0.2230267, -0.06613865, 0.13198656, 0.1876761, 0.26010045, -0.07860814, -0.08750986, -0.16776286, 0.2276038, -0.0048067835, -0.09549664, 0.09228111, -0.13566737, 0.49211678, 0.21506462, -0.46096098, 0.02290908, -0.030406667, -0.26267675, 0.06007962, 0.45803392, 0.32293695, 0.114533946, 0.08458777, -0.14395903, -0.04171004, 0.19107072, 0.24035561, 0.30237022, 0.06198846, 0.1061096, 0.23064552, -0.24602959, 0.06736374, -0.30546662, 0.018916616, 0.23576747, -0.20862201, -0.07291601, 0.16750018, 0.21796244, -0.15457813, -0.10267717, 0.4108639, 0.22948943, 0.024940656, -0.12781665, -0.09829003, -0.36444336, 0.33999962, 0.2979673, 0.18795507, 0.0058128177, 0.03166188, -0.16602582, 0.23594972, -0.08020127, -0.10588683, -0.13476343, -0.20074773, -0.018953657, -0.049433485, 0.024178218, -0.15827763, 0.08096834, -0.16638803, -0.13942112, 0.066614725, -0.088811725, 0.13083075, 0.16235103, -0.28993893, -0.27632293, 0.050675448, -0.60434693, -0.11318788, 0.28318927, 0.4251388, -0.115691744, 0.20609955, 0.048247, 0.22572593, -0.009609341, -0.047968842, -0.063365586, -0.09246086, 0.17824914, 0.27709022, -0.19850928, -0.3818279, 0.076068364, 0.021581797, -0.10875837, -0.012900721, -0.02160152, -0.07872134, -0.18224761, -0.19908729, 0.06501623, -0.26073945, -0.13469686, 0.26163143, -0.016409602, -0.19890097, -0.027429793, 0.29457363, 0.06711283, -0.11308402, -0.16379794, 0.42119157, 0.33880904, 0.006702892, 0.011503242, 0.17496394, 0.11048721, -0.29479164, 0.41352564, -0.31115502, -0.014682422, -0.12465822, 0.122917116, 0.15077394, -0.20986927, 0.26472732, 0.12745324, 0.28114912, 0.17852971, 0.09809405, -0.021323938, 0.1425695, -0.116022065, 0.15450992, 0.21419188, 0.10874285, -0.0035544126, -0.33577085, -0.21062046, 0.2557292, 0.365727, 0.17186679, -0.034007914, 0.18571931, 0.113826446, 0.2041972, 0.16858879, -0.39305702, 0.047708813, 0.3407281, 0.10008117, 0.1666761, -0.08277356, -0.2655564, -0.2390036, -0.08099706, 0.045763668, -0.30731857, -0.11569927, 0.37365586, 0.01956875, -0.001237718, -0.14353643, -0.21356548, -0.03486019, -0.12086741, 0.041639853, 0.104439534, -0.092656165, -0.40179342, -0.10187488, -0.5471143, -0.12290574, 0.2087677, -0.30770865, 0.22984694, -0.29476187, 0.096242994, 0.384363, 0.026361842, 5.725033E-4, -0.19238421, -0.028222995, 0.113249354, 0.32910722, 0.23294336, -0.3986335, 0.09978031, 0.14896542, 0.23949988, 0.1478057, -0.032381695, -0.12643869, 0.115882315, -0.19580248, 0.19138065, -0.21877433, 0.19352977, -0.2535542, -0.20547706, 0.27981648, -0.4005575, -0.046523586, 0.09361415, 0.2590049, 0.007964988, -0.05637875, -0.08670184, 0.099779375, 0.18480189, 0.14401811, -0.39154035, 0.2751374, -0.03074833, -0.024801074, -0.021942627, 0.18419088, 0.2608532, 0.11138497, -0.40339246, -0.13331044, 0.1154039, 0.2723197, -0.2262617, 0.16250908, -0.26903376, -0.37250632, -0.13596842, 0.21473151, 0.21515769, 0.16038711, -0.27683273, 0.17736936, -0.08310452, -0.42004582, -0.36562136, -0.09779574, 0.2411314, 0.20187439, 0.18733725, 0.2641376, 0.017855817, 0.11153809, 0.17190574, 0.16802579, -0.16192591, 0.18056063, -0.35846385, -0.049806055, -0.26109663, -0.19828144, -0.23734608, 0.3932265, -0.22376418, 0.22467436, 0.38326305, -0.31592938, -0.081319235, 0.1556078, 0.08884176, 0.06665615, -0.10425053, 0.20910178, 0.17956693, -0.10533009, 0.24591704, -0.0038596322, 0.2617894, 0.18107952, 0.09749653, 0.12563631, 0.12635578, -0.1391452, 0.041367147, 0.008686017, -0.029572926, -0.24497731, -0.14718057, 0.21611099, -0.03282076, 0.03800035, -0.16256967, -0.09790739, -0.0014242514, 0.4040815, -0.37026706, 0.26380262, 0.09079506, 0.14612387, -0.24610792, -0.1947632, 0.07052605, 0.18185152, -0.40230885, -0.007907403, 0.15706225, 0.111771695, 0.23515886, 0.25010493, -0.01306646, -0.10541734, 0.4784001, -0.14441222, -0.13855816, 0.27101827, -0.24725674, -0.2802681, 0.27687818, -0.03339839, 0.3054517, 0.130619, 0.037950855, 0.077942155, -0.5888695, 0.07351768, -0.46685404, -0.0040499587, 0.025250355, -0.0859045, -0.20816529, 0.14463536, 0.28113353, -0.25993484, -0.040858176, 0.21026222, 0.08361061, -0.10720821, 0.48898703, -2.2704061E-4, 0.2221854, -0.08727743, 0.2570495, -0.20949613, 0.25267476, -0.27079397, -0.09415934, 0.006743326, 0.09153167, 0.053167544, -0.03806283, -0.3219283, 0.22196239, 0.0017300758, -0.057169266, -0.05333144, 0.0975005, 0.005131098, 0.046497438, 0.0623432, 0.321922, 0.21922378, -0.03954325, -0.37501228, -0.0155652305, -0.09921332, 0.06552464, 0.02618605, -0.014657838, 0.4420349, -0.08608749, 0.028822435, -0.132511, 0.26999778, 0.20299375, 0.1389036, 0.12914367, 0.07623987, 0.14375348, -0.052015696, -0.018790662, -0.13280135, -0.21133803, -0.27303988, 0.20791331, -0.22594361, -0.17630367, 0.18414178, 0.21877678, -0.12745881, 0.13150722, 0.32212988, 0.11044695, -0.16207896, 0.27268118, -0.099328026, 0.08867667, 0.31989032, -0.015583255, 0.16372082, 0.51543236, 0.20965122, -0.37761936, -0.029446285, -0.22624405, -0.0051141595, 0.26030782, -0.16369255, 0.1629463, 0.36997578, 0.31189638, 0.42942852, -0.001673679, -0.1205522, 0.08194525, 0.19638115, 0.011460368, -0.16485056, -0.16228262, 0.2740312, 0.05017268, -0.15451404, -0.03512774, -0.13452254, 0.028118514, -0.1329012, -0.4101204, 0.03879618, 0.21135166, -0.4898482, 0.091599375, -0.3092855, 0.06110656, -0.22477996, 0.21057707, -0.23781657, -0.10551279, 0.37984648, -0.09410357, 0.04732256, -0.18294896, -0.15246752, 0.021639392, 0.02237629, -0.017262453, -0.026302386, 0.3608514, -0.108894534, 0.0474644, 0.024507962, 0.1971899, -0.06268896, 0.19199464, 0.032802872, -0.13034374, -0.3862199, 0.15018144, -0.20086884, -0.4251439, -0.3633213, 0.35504803, -0.15731166, -0.25986132, -0.22435285, -0.26186633, 0.070692174, 0.16790512, 0.4691279, -0.39221457, -0.07939644, 0.4884994, -0.045310415, -0.18310241, 0.29507643, 0.2049564, -0.31412512, 0.3319548, 0.26859912, -0.046015155, 0.017437246, 0.50600624, 0.13420314, 0.16892372, -0.2181584, 0.43435982, -0.22135681, 0.32880262, -0.15483025, -0.22297119, -0.19834407, -0.013414336, 0.3219674, 0.1825749, -0.42209828, -0.10438974, 0.037324775, 0.361452, -0.37822384, -0.065436505, 0.0033893238, -0.33716473, 0.14011075, 0.061164405, 0.22412765, -0.37722385, -0.003241484, 0.39605972, -0.32297456, 0.121618845, 0.31544292, 0.098115414, 0.3545687, -0.040462196, -0.0015956911, 0.05998545, -0.2341839, -0.012709303, 0.12954898, 0.55812806, 0.15784661, -0.38522407, 0.10765506, 0.24407583, -0.16438684, 0.29567552, -0.087225564, -0.05044142, 0.26510367, -0.03295447, 0.1525916, -0.09594085, -0.23619364, -0.305938, 0.369586, -0.2030003, -0.10078699, -0.158635, -0.09669019, -0.13501357, 0.06365931, -0.3862268, 0.33531812, 0.13119636, -0.19266993, -0.08529151, -0.068182945, -0.14552347, -0.20925495, -0.26174715, 0.42169508, -0.15332408, -0.4422107, 0.26490703, 0.044914585, 0.3462565, 0.028742079, 0.112568825, -0.049105942, 0.14644153, 0.10022157, -0.1353474, 0.2859512, 0.05943501, -0.5361389, -0.14724284, -0.22212745, 0.09529675, 0.19780394, -0.3280398, 0.03847211, 0.047032848, 0.13924433, 0.04658549, -0.087256454, -0.06858529, 0.39810196, 0.2195755, 0.27559587, 0.080366105, 0.224659, -0.037330583, -0.32662335, 0.049084876, 0.08356128, -0.2064612, 0.44953158, -0.07670108, -0.3735964, -0.07003661, 0.40409234, 0.09436887, 0.002322631, -0.058938935, 0.21089995, 0.15257117, -0.11534224, 0.1868882, -0.027790006, -0.13924241, -0.11449168, 0.07226305, -0.2269321, 0.05748148, -0.15779555, -0.004239961, -0.2076957, 0.025448453, -0.20258994, 0.26121607, -0.3375632, 0.09614588, 0.0837002, 0.2889494, -0.35512874, -0.16446145, -0.07327044, 0.162409, 0.27919483, 0.33874825, 0.03656232, 0.020300211, -0.18202537, -0.24737185, 0.062216822, -0.20302315, 0.13892165, 0.07089765, 0.22756256, -0.33009684, -0.18666261, 0.20215912, -0.09404198, -0.14015506, 0.3908819, 0.23564205, 0.2299575, 0.0022593169, 0.25940043, 0.010104028, -0.17387737, -0.12020657, -0.25978172, 0.069076784, -0.09353954, -0.057409134, -0.048858702, -0.115044445, -0.19313143, -0.15462089, 0.14306325, 0.13297117, 0.018673927, -0.05574753, -0.0076537174, -0.28154808, 0.31979483, 0.026106903, 0.04603346, -0.059027776, 0.041025206, -0.13605414, 0.23317498, 0.21174069, 0.078041025, -0.17607999, -0.051855393, -0.27796885, -0.35570046, 0.04882217, 0.13480361, 0.11094927, -0.07341316, -0.2513282, 0.017097535, -0.12939982, 0.1765655, 0.0043726726, -0.1646482, -0.09244843, -0.07229631, -0.036124315, 0.0713724, -0.20348924, -0.20200686, -0.099306956, -0.07727608, -0.06878283, 0.34491593, -0.04862456, 0.288199, -0.14932868, -0.011248313, -0.1619775, 0.12771314, -0.067322426, 0.050312262, 0.26488206, -0.45438048, -0.16701354, -0.024269667, -0.20508873, -0.13706926, -0.081109755, -0.032133017, 0.22538628, -0.35670912, 0.24337423, -0.11686166, 0.17959888, -0.07400869, -0.26138356, -0.14498967, 0.002314695, 0.2509966, -0.33920595, -0.24641253, -0.26611453, -0.093208805, -0.0814982, -0.25990567, 0.4346015, -0.12232823, -0.060464166, -0.0034285442, 0.44883457, 0.2001189, 0.1663645, 0.21496214, -0.038633876, 0.04551105, 0.11791142, -0.4636027, 0.2206974, -0.22927228, -0.118581764, 0.018132765, 0.09559669, -0.022403285, 0.03678469, -0.118074425, -0.084937155, 0.22283584, -0.37808853, -0.05847166, 0.27044755, 0.16123472, -0.26384753, 0.029716926, 0.12959477, 0.39335707, 0.088695444, -0.22262987, 0.11189321, -0.33450133, -0.040625323, -0.19362892, -0.29486585, 0.14436631, -0.07635042, 0.0817869, -0.09728282, -0.2813908, 0.21378614, -0.054669898, -0.09780386, 0.41780302, -0.0015549128, -0.1261193, 0.13937768, 0.0039213966, 0.021901237, -0.10023584, 0.2714901, 0.20122135, -0.27988607, 0.14680666, -0.13224062, -0.03425929, -0.09730296]}] |[{doc_similarity_rankings, 0, 91, Eighth document, the warmest place in France is the French Riviera coast in Southern France., {pieceId -> -1, lshId -> -1719102856, isWordStart -> true, token -> Eighth document, the warmest place in France is the French Riviera coast in Southern France., lshNeighbors -> [(-1548374770,0.2329717161223739),(-1320876223,0.2761524746260818),(1274183715,0.28519768414650126),(1293373212,0.2876650539432857),(-612640902,0.2991777399965483),(1634839239,0.3901714913624425),(1510101612,0.4043799951515284)], sentence -> 0}, [-0.016810948, -0.2047661, -0.2230267, -0.06613865, 0.13198656, 0.1876761, 0.26010045, -0.07860814, -0.08750986, -0.16776286, 0.2276038, -0.0048067835, -0.09549664, 0.09228111, -0.13566737, 0.49211678, 0.21506462, -0.46096098, 0.02290908, -0.030406667, -0.26267675, 0.06007962, 0.45803392, 0.32293695, 0.114533946, 0.08458777, -0.14395903, -0.04171004, 0.19107072, 0.24035561, 0.30237022, 0.06198846, 0.1061096, 0.23064552, -0.24602959, 0.06736374, -0.30546662, 0.018916616, 0.23576747, -0.20862201, -0.07291601, 0.16750018, 0.21796244, -0.15457813, -0.10267717, 0.4108639, 0.22948943, 0.024940656, -0.12781665, -0.09829003, -0.36444336, 0.33999962, 0.2979673, 0.18795507, 0.0058128177, 0.03166188, -0.16602582, 0.23594972, -0.08020127, -0.10588683, -0.13476343, -0.20074773, -0.018953657, -0.049433485, 0.024178218, -0.15827763, 0.08096834, -0.16638803, -0.13942112, 0.066614725, -0.088811725, 0.13083075, 0.16235103, -0.28993893, -0.27632293, 0.050675448, -0.60434693, -0.11318788, 0.28318927, 0.4251388, -0.115691744, 0.20609955, 0.048247, 0.22572593, -0.009609341, -0.047968842, -0.063365586, -0.09246086, 0.17824914, 0.27709022, -0.19850928, -0.3818279, 0.076068364, 0.021581797, -0.10875837, -0.012900721, -0.02160152, -0.07872134, -0.18224761, -0.19908729, 0.06501623, -0.26073945, -0.13469686, 0.26163143, -0.016409602, -0.19890097, -0.027429793, 0.29457363, 0.06711283, -0.11308402, -0.16379794, 0.42119157, 0.33880904, 0.006702892, 0.011503242, 0.17496394, 0.11048721, -0.29479164, 0.41352564, -0.31115502, -0.014682422, -0.12465822, 0.122917116, 0.15077394, -0.20986927, 0.26472732, 0.12745324, 0.28114912, 0.17852971, 0.09809405, -0.021323938, 0.1425695, -0.116022065, 0.15450992, 0.21419188, 0.10874285, -0.0035544126, -0.33577085, -0.21062046, 0.2557292, 0.365727, 0.17186679, -0.034007914, 0.18571931, 0.113826446, 0.2041972, 0.16858879, -0.39305702, 0.047708813, 0.3407281, 0.10008117, 0.1666761, -0.08277356, -0.2655564, -0.2390036, -0.08099706, 0.045763668, -0.30731857, -0.11569927, 0.37365586, 0.01956875, -0.001237718, -0.14353643, -0.21356548, -0.03486019, -0.12086741, 0.041639853, 0.104439534, -0.092656165, -0.40179342, -0.10187488, -0.5471143, -0.12290574, 0.2087677, -0.30770865, 0.22984694, -0.29476187, 0.096242994, 0.384363, 0.026361842, 5.725033E-4, -0.19238421, -0.028222995, 0.113249354, 0.32910722, 0.23294336, -0.3986335, 0.09978031, 0.14896542, 0.23949988, 0.1478057, -0.032381695, -0.12643869, 0.115882315, -0.19580248, 0.19138065, -0.21877433, 0.19352977, -0.2535542, -0.20547706, 0.27981648, -0.4005575, -0.046523586, 0.09361415, 0.2590049, 0.007964988, -0.05637875, -0.08670184, 0.099779375, 0.18480189, 0.14401811, -0.39154035, 0.2751374, -0.03074833, -0.024801074, -0.021942627, 0.18419088, 0.2608532, 0.11138497, -0.40339246, -0.13331044, 0.1154039, 0.2723197, -0.2262617, 0.16250908, -0.26903376, -0.37250632, -0.13596842, 0.21473151, 0.21515769, 0.16038711, -0.27683273, 0.17736936, -0.08310452, -0.42004582, -0.36562136, -0.09779574, 0.2411314, 0.20187439, 0.18733725, 0.2641376, 0.017855817, 0.11153809, 0.17190574, 0.16802579, -0.16192591, 0.18056063, -0.35846385, -0.049806055, -0.26109663, -0.19828144, -0.23734608, 0.3932265, -0.22376418, 0.22467436, 0.38326305, -0.31592938, -0.081319235, 0.1556078, 0.08884176, 0.06665615, -0.10425053, 0.20910178, 0.17956693, -0.10533009, 0.24591704, -0.0038596322, 0.2617894, 0.18107952, 0.09749653, 0.12563631, 0.12635578, -0.1391452, 0.041367147, 0.008686017, -0.029572926, -0.24497731, -0.14718057, 0.21611099, -0.03282076, 0.03800035, -0.16256967, -0.09790739, -0.0014242514, 0.4040815, -0.37026706, 0.26380262, 0.09079506, 0.14612387, -0.24610792, -0.1947632, 0.07052605, 0.18185152, -0.40230885, -0.007907403, 0.15706225, 0.111771695, 0.23515886, 0.25010493, -0.01306646, -0.10541734, 0.4784001, -0.14441222, -0.13855816, 0.27101827, -0.24725674, -0.2802681, 0.27687818, -0.03339839, 0.3054517, 0.130619, 0.037950855, 0.077942155, -0.5888695, 0.07351768, -0.46685404, -0.0040499587, 0.025250355, -0.0859045, -0.20816529, 0.14463536, 0.28113353, -0.25993484, -0.040858176, 0.21026222, 0.08361061, -0.10720821, 0.48898703, -2.2704061E-4, 0.2221854, -0.08727743, 0.2570495, -0.20949613, 0.25267476, -0.27079397, -0.09415934, 0.006743326, 0.09153167, 0.053167544, -0.03806283, -0.3219283, 0.22196239, 0.0017300758, -0.057169266, -0.05333144, 0.0975005, 0.005131098, 0.046497438, 0.0623432, 0.321922, 0.21922378, -0.03954325, -0.37501228, -0.0155652305, -0.09921332, 0.06552464, 0.02618605, -0.014657838, 0.4420349, -0.08608749, 0.028822435, -0.132511, 0.26999778, 0.20299375, 0.1389036, 0.12914367, 0.07623987, 0.14375348, -0.052015696, -0.018790662, -0.13280135, -0.21133803, -0.27303988, 0.20791331, -0.22594361, -0.17630367, 0.18414178, 0.21877678, -0.12745881, 0.13150722, 0.32212988, 0.11044695, -0.16207896, 0.27268118, -0.099328026, 0.08867667, 0.31989032, -0.015583255, 0.16372082, 0.51543236, 0.20965122, -0.37761936, -0.029446285, -0.22624405, -0.0051141595, 0.26030782, -0.16369255, 0.1629463, 0.36997578, 0.31189638, 0.42942852, -0.001673679, -0.1205522, 0.08194525, 0.19638115, 0.011460368, -0.16485056, -0.16228262, 0.2740312, 0.05017268, -0.15451404, -0.03512774, -0.13452254, 0.028118514, -0.1329012, -0.4101204, 0.03879618, 0.21135166, -0.4898482, 0.091599375, -0.3092855, 0.06110656, -0.22477996, 0.21057707, -0.23781657, -0.10551279, 0.37984648, -0.09410357, 0.04732256, -0.18294896, -0.15246752, 0.021639392, 0.02237629, -0.017262453, -0.026302386, 0.3608514, -0.108894534, 0.0474644, 0.024507962, 0.1971899, -0.06268896, 0.19199464, 0.032802872, -0.13034374, -0.3862199, 0.15018144, -0.20086884, -0.4251439, -0.3633213, 0.35504803, -0.15731166, -0.25986132, -0.22435285, -0.26186633, 0.070692174, 0.16790512, 0.4691279, -0.39221457, -0.07939644, 0.4884994, -0.045310415, -0.18310241, 0.29507643, 0.2049564, -0.31412512, 0.3319548, 0.26859912, -0.046015155, 0.017437246, 0.50600624, 0.13420314, 0.16892372, -0.2181584, 0.43435982, -0.22135681, 0.32880262, -0.15483025, -0.22297119, -0.19834407, -0.013414336, 0.3219674, 0.1825749, -0.42209828, -0.10438974, 0.037324775, 0.361452, -0.37822384, -0.065436505, 0.0033893238, -0.33716473, 0.14011075, 0.061164405, 0.22412765, -0.37722385, -0.003241484, 0.39605972, -0.32297456, 0.121618845, 0.31544292, 0.098115414, 0.3545687, -0.040462196, -0.0015956911, 0.05998545, -0.2341839, -0.012709303, 0.12954898, 0.55812806, 0.15784661, -0.38522407, 0.10765506, 0.24407583, -0.16438684, 0.29567552, -0.087225564, -0.05044142, 0.26510367, -0.03295447, 0.1525916, -0.09594085, -0.23619364, -0.305938, 0.369586, -0.2030003, -0.10078699, -0.158635, -0.09669019, -0.13501357, 0.06365931, -0.3862268, 0.33531812, 0.13119636, -0.19266993, -0.08529151, -0.068182945, -0.14552347, -0.20925495, -0.26174715, 0.42169508, -0.15332408, -0.4422107, 0.26490703, 0.044914585, 0.3462565, 0.028742079, 0.112568825, -0.049105942, 0.14644153, 0.10022157, -0.1353474, 0.2859512, 0.05943501, -0.5361389, -0.14724284, -0.22212745, 0.09529675, 0.19780394, -0.3280398, 0.03847211, 0.047032848, 0.13924433, 0.04658549, -0.087256454, -0.06858529, 0.39810196, 0.2195755, 0.27559587, 0.080366105, 0.224659, -0.037330583, -0.32662335, 0.049084876, 0.08356128, -0.2064612, 0.44953158, -0.07670108, -0.3735964, -0.07003661, 0.40409234, 0.09436887, 0.002322631, -0.058938935, 0.21089995, 0.15257117, -0.11534224, 0.1868882, -0.027790006, -0.13924241, -0.11449168, 0.07226305, -0.2269321, 0.05748148, -0.15779555, -0.004239961, -0.2076957, 0.025448453, -0.20258994, 0.26121607, -0.3375632, 0.09614588, 0.0837002, 0.2889494, -0.35512874, -0.16446145, -0.07327044, 0.162409, 0.27919483, 0.33874825, 0.03656232, 0.020300211, -0.18202537, -0.24737185, 0.062216822, -0.20302315, 0.13892165, 0.07089765, 0.22756256, -0.33009684, -0.18666261, 0.20215912, -0.09404198, -0.14015506, 0.3908819, 0.23564205, 0.2299575, 0.0022593169, 0.25940043, 0.010104028, -0.17387737, -0.12020657, -0.25978172, 0.069076784, -0.09353954, -0.057409134, -0.048858702, -0.115044445, -0.19313143, -0.15462089, 0.14306325, 0.13297117, 0.018673927, -0.05574753, -0.0076537174, -0.28154808, 0.31979483, 0.026106903, 0.04603346, -0.059027776, 0.041025206, -0.13605414, 0.23317498, 0.21174069, 0.078041025, -0.17607999, -0.051855393, -0.27796885, -0.35570046, 0.04882217, 0.13480361, 0.11094927, -0.07341316, -0.2513282, 0.017097535, -0.12939982, 0.1765655, 0.0043726726, -0.1646482, -0.09244843, -0.07229631, -0.036124315, 0.0713724, -0.20348924, -0.20200686, -0.099306956, -0.07727608, -0.06878283, 0.34491593, -0.04862456, 0.288199, -0.14932868, -0.011248313, -0.1619775, 0.12771314, -0.067322426, 0.050312262, 0.26488206, -0.45438048, -0.16701354, -0.024269667, -0.20508873, -0.13706926, -0.081109755, -0.032133017, 0.22538628, -0.35670912, 0.24337423, -0.11686166, 0.17959888, -0.07400869, -0.26138356, -0.14498967, 0.002314695, 0.2509966, -0.33920595, -0.24641253, -0.26611453, -0.093208805, -0.0814982, -0.25990567, 0.4346015, -0.12232823, -0.060464166, -0.0034285442, 0.44883457, 0.2001189, 0.1663645, 0.21496214, -0.038633876, 0.04551105, 0.11791142, -0.4636027, 0.2206974, -0.22927228, -0.118581764, 0.018132765, 0.09559669, -0.022403285, 0.03678469, -0.118074425, -0.084937155, 0.22283584, -0.37808853, -0.05847166, 0.27044755, 0.16123472, -0.26384753, 0.029716926, 0.12959477, 0.39335707, 0.088695444, -0.22262987, 0.11189321, -0.33450133, -0.040625323, -0.19362892, -0.29486585, 0.14436631, -0.07635042, 0.0817869, -0.09728282, -0.2813908, 0.21378614, -0.054669898, -0.09780386, 0.41780302, -0.0015549128, -0.1261193, 0.13937768, 0.0039213966, 0.021901237, -0.10023584, 0.2714901, 0.20122135, -0.27988607, 0.14680666, -0.13224062, -0.03425929, -0.09730296]}] |-1719102856 |[(-1548374770,0.2329717161223739),(-1320876223,0.2761524746260818),(1274183715,0.28519768414650126),(1293373212,0.2876650539432857),(-612640902,0.2991777399965483),(1634839239,0.3901714913624425),(1510101612,0.4043799951515284)] |-1548374770 |0.2329717161223739 |\n", - "+------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-----------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-------------------+-------------------------+\n", - "\n" - ] - } - ], - "source": [ - "from sparknlp.annotator.similarity.document_similarity_ranker import *\n", - "\n", - "document_assembler = DocumentAssembler() \\\n", - " .setInputCol(\"text\") \\\n", - " .setOutputCol(\"document\")\n", - "sentence_detector = SentenceDetector() \\\n", - " .setInputCols([\"document\"]) \\\n", - " .setOutputCol(\"sentence\")\n", - "tokenizer = Tokenizer() \\\n", - " .setInputCols([\"sentence\"]) \\\n", - " .setOutputCol(\"token\")\n", - "\n", - "sentence_embeddings = RoBertaSentenceEmbeddings.pretrained() \\\n", - " .setInputCols([\"document\"]) \\\n", - " .setOutputCol(\"sentence_embeddings\")\n", - "\n", - "# TODO add document_similarity_ranker with input col embeddings too\n", - "document_similarity_ranker = DocumentSimilarityRankerApproach() \\\n", - " .setInputCols(\"sentence_embeddings\") \\\n", - " .setOutputCol(\"doc_similarity_rankings\") \\\n", - " .setSimilarityMethod(\"brp\") \\\n", - " .setNumberOfNeighbours(10) \\\n", - " .setBucketLength(2.0) \\\n", - " .setNumHashTables(3) \\\n", - " .setVisibleDistances(True) \\\n", - " .setIdentityRanking(False)\n", - "\n", - "document_similarity_ranker_finisher = DocumentSimilarityRankerFinisher() \\\n", - " .setInputCols(\"doc_similarity_rankings\") \\\n", - " .setOutputCols(\n", - " \"finished_doc_similarity_rankings_id\",\n", - " \"finished_doc_similarity_rankings_neighbors\") \\\n", - " .setExtractNearestNeighbor(True)\n", - "\n", - "pipeline = Pipeline(stages=[\n", - " document_assembler,\n", - " sentence_detector,\n", - " tokenizer,\n", - " sentence_embeddings,\n", - " document_similarity_ranker,\n", - " document_similarity_ranker_finisher\n", - " ])\n", - "\n", - "model = pipeline.fit(data)\n", - "# TODO add write/read pipeline\n", - "model.transform(data).show(10, False)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "2cde88af", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.16" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} From 62a294599443743c6587f0ef25ae8b05cb3e82f9 Mon Sep 17 00:00:00 2001 From: Stefano Lori Date: Sat, 1 Jul 2023 22:04:59 +0200 Subject: [PATCH 26/26] Bumped version 5.0.0 in doc sim ranker test --- .../text-similarity/doc-sim-ranker/test_doc_sim_ranker.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/python/annotation/text/english/text-similarity/doc-sim-ranker/test_doc_sim_ranker.ipynb b/examples/python/annotation/text/english/text-similarity/doc-sim-ranker/test_doc_sim_ranker.ipynb index 121018b6eb2a9f..eb77b388e42dc7 100644 --- a/examples/python/annotation/text/english/text-similarity/doc-sim-ranker/test_doc_sim_ranker.ipynb +++ b/examples/python/annotation/text/english/text-similarity/doc-sim-ranker/test_doc_sim_ranker.ipynb @@ -231,7 +231,7 @@ " .config(\"spark.driver.memory\",\"16G\")\\\n", " .config(\"spark.driver.maxResultSize\", \"0\") \\\n", " .config(\"spark.kryoserializer.buffer.max\", \"2000M\")\\\n", - " .config(\"spark.jars.packages\", \"com.johnsnowlabs.nlp:spark-nlp_2.12:4.4.4\")\\\n", + " .config(\"spark.jars.packages\", \"com.johnsnowlabs.nlp:spark-nlp_2.12:5.0.0\")\\\n", " .getOrCreate()" ] },