diff --git a/docs/_posts/ahmedlone127/2023-09-12-20split_dataset_en.md b/docs/_posts/ahmedlone127/2023-09-12-20split_dataset_en.md new file mode 100644 index 00000000000000..780390afc444b0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-20split_dataset_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English 20split_dataset BertEmbeddings from Billwzl +author: John Snow Labs +name: 20split_dataset +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`20split_dataset` is a English model originally trained by Billwzl. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/20split_dataset_en_5.1.1_3.0_1694558868282.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/20split_dataset_en_5.1.1_3.0_1694558868282.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("20split_dataset","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("20split_dataset", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|20split_dataset| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.0 MB| + +## References + +https://huggingface.co/Billwzl/20split_dataset \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-abena_base_akuapem_twi_cased_en.md b/docs/_posts/ahmedlone127/2023-09-12-abena_base_akuapem_twi_cased_en.md new file mode 100644 index 00000000000000..3b52b9a141e7c2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-abena_base_akuapem_twi_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English abena_base_akuapem_twi_cased BertEmbeddings from Ghana-NLP +author: John Snow Labs +name: abena_base_akuapem_twi_cased +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`abena_base_akuapem_twi_cased` is a English model originally trained by Ghana-NLP. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/abena_base_akuapem_twi_cased_en_5.1.1_3.0_1694558470329.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/abena_base_akuapem_twi_cased_en_5.1.1_3.0_1694558470329.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("abena_base_akuapem_twi_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("abena_base_akuapem_twi_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|abena_base_akuapem_twi_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|664.5 MB| + +## References + +https://huggingface.co/Ghana-NLP/abena-base-akuapem-twi-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-abena_base_asante_twi_uncased_en.md b/docs/_posts/ahmedlone127/2023-09-12-abena_base_asante_twi_uncased_en.md new file mode 100644 index 00000000000000..f196600033a6cc --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-abena_base_asante_twi_uncased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English abena_base_asante_twi_uncased BertEmbeddings from Ghana-NLP +author: John Snow Labs +name: abena_base_asante_twi_uncased +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`abena_base_asante_twi_uncased` is a English model originally trained by Ghana-NLP. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/abena_base_asante_twi_uncased_en_5.1.1_3.0_1694558682719.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/abena_base_asante_twi_uncased_en_5.1.1_3.0_1694558682719.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("abena_base_asante_twi_uncased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("abena_base_asante_twi_uncased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|abena_base_asante_twi_uncased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|664.3 MB| + +## References + +https://huggingface.co/Ghana-NLP/abena-base-asante-twi-uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-abstract_sim_query_pubmed_en.md b/docs/_posts/ahmedlone127/2023-09-12-abstract_sim_query_pubmed_en.md new file mode 100644 index 00000000000000..934d2783451faf --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-abstract_sim_query_pubmed_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English abstract_sim_query_pubmed BertEmbeddings from biu-nlp +author: John Snow Labs +name: abstract_sim_query_pubmed +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`abstract_sim_query_pubmed` is a English model originally trained by biu-nlp. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/abstract_sim_query_pubmed_en_5.1.1_3.0_1694561530585.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/abstract_sim_query_pubmed_en_5.1.1_3.0_1694561530585.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("abstract_sim_query_pubmed","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("abstract_sim_query_pubmed", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|abstract_sim_query_pubmed| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|410.2 MB| + +## References + +https://huggingface.co/biu-nlp/abstract-sim-query-pubmed \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-abstract_sim_sentence_pubmed_en.md b/docs/_posts/ahmedlone127/2023-09-12-abstract_sim_sentence_pubmed_en.md new file mode 100644 index 00000000000000..67b65af41b7b7a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-abstract_sim_sentence_pubmed_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English abstract_sim_sentence_pubmed BertEmbeddings from biu-nlp +author: John Snow Labs +name: abstract_sim_sentence_pubmed +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`abstract_sim_sentence_pubmed` is a English model originally trained by biu-nlp. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/abstract_sim_sentence_pubmed_en_5.1.1_3.0_1694561658345.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/abstract_sim_sentence_pubmed_en_5.1.1_3.0_1694561658345.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("abstract_sim_sentence_pubmed","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("abstract_sim_sentence_pubmed", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|abstract_sim_sentence_pubmed| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|409.3 MB| + +## References + +https://huggingface.co/biu-nlp/abstract-sim-sentence-pubmed \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-ai12_mackei_en.md b/docs/_posts/ahmedlone127/2023-09-12-ai12_mackei_en.md new file mode 100644 index 00000000000000..dd2be1f5865cf6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-ai12_mackei_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English ai12_mackei BertEmbeddings from mackei +author: John Snow Labs +name: ai12_mackei +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ai12_mackei` is a English model originally trained by mackei. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ai12_mackei_en_5.1.1_3.0_1694551191898.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ai12_mackei_en_5.1.1_3.0_1694551191898.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("ai12_mackei","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("ai12_mackei", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ai12_mackei| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/mackei/ai12 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-akeylegalbert_en.md b/docs/_posts/ahmedlone127/2023-09-12-akeylegalbert_en.md new file mode 100644 index 00000000000000..787d76eff64468 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-akeylegalbert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English akeylegalbert BertEmbeddings from hatemestinbejaia +author: John Snow Labs +name: akeylegalbert +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`akeylegalbert` is a English model originally trained by hatemestinbejaia. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/akeylegalbert_en_5.1.1_3.0_1694557063708.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/akeylegalbert_en_5.1.1_3.0_1694557063708.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("akeylegalbert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("akeylegalbert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|akeylegalbert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/hatemestinbejaia/AkeyLegalBert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-akeylegalbert_inscotus_and_ledgar_14epoch_en.md b/docs/_posts/ahmedlone127/2023-09-12-akeylegalbert_inscotus_and_ledgar_14epoch_en.md new file mode 100644 index 00000000000000..6887e299659818 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-akeylegalbert_inscotus_and_ledgar_14epoch_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English akeylegalbert_inscotus_and_ledgar_14epoch BertEmbeddings from hatemestinbejaia +author: John Snow Labs +name: akeylegalbert_inscotus_and_ledgar_14epoch +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`akeylegalbert_inscotus_and_ledgar_14epoch` is a English model originally trained by hatemestinbejaia. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/akeylegalbert_inscotus_and_ledgar_14epoch_en_5.1.1_3.0_1694554005394.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/akeylegalbert_inscotus_and_ledgar_14epoch_en_5.1.1_3.0_1694554005394.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("akeylegalbert_inscotus_and_ledgar_14epoch","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("akeylegalbert_inscotus_and_ledgar_14epoch", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|akeylegalbert_inscotus_and_ledgar_14epoch| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/hatemestinbejaia/AkeyLegalBert_inScotus_and_Ledgar_14epoch \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-akeylegalbert_inscotus_and_ledgar_en.md b/docs/_posts/ahmedlone127/2023-09-12-akeylegalbert_inscotus_and_ledgar_en.md new file mode 100644 index 00000000000000..9310c0c7819043 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-akeylegalbert_inscotus_and_ledgar_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English akeylegalbert_inscotus_and_ledgar BertEmbeddings from hatemestinbejaia +author: John Snow Labs +name: akeylegalbert_inscotus_and_ledgar +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`akeylegalbert_inscotus_and_ledgar` is a English model originally trained by hatemestinbejaia. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/akeylegalbert_inscotus_and_ledgar_en_5.1.1_3.0_1694553675259.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/akeylegalbert_inscotus_and_ledgar_en_5.1.1_3.0_1694553675259.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("akeylegalbert_inscotus_and_ledgar","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("akeylegalbert_inscotus_and_ledgar", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|akeylegalbert_inscotus_and_ledgar| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/hatemestinbejaia/AkeyLegalBert_inScotus_and_Ledgar \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-algarlegalbert1_large_arabertv2_en.md b/docs/_posts/ahmedlone127/2023-09-12-algarlegalbert1_large_arabertv2_en.md new file mode 100644 index 00000000000000..d1a780f6d78143 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-algarlegalbert1_large_arabertv2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English algarlegalbert1_large_arabertv2 BertEmbeddings from hatemestinbejaia +author: John Snow Labs +name: algarlegalbert1_large_arabertv2 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`algarlegalbert1_large_arabertv2` is a English model originally trained by hatemestinbejaia. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/algarlegalbert1_large_arabertv2_en_5.1.1_3.0_1694555945058.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/algarlegalbert1_large_arabertv2_en_5.1.1_3.0_1694555945058.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("algarlegalbert1_large_arabertv2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("algarlegalbert1_large_arabertv2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|algarlegalbert1_large_arabertv2| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.4 GB| + +## References + +https://huggingface.co/hatemestinbejaia/AlgArLegalBert1-large-arabertv2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-arabert256_flickr8k_en.md b/docs/_posts/ahmedlone127/2023-09-12-arabert256_flickr8k_en.md new file mode 100644 index 00000000000000..e38d9b909ca658 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-arabert256_flickr8k_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English arabert256_flickr8k BertEmbeddings from jontooy +author: John Snow Labs +name: arabert256_flickr8k +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`arabert256_flickr8k` is a English model originally trained by jontooy. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/arabert256_flickr8k_en_5.1.1_3.0_1694510056959.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/arabert256_flickr8k_en_5.1.1_3.0_1694510056959.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("arabert256_flickr8k","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("arabert256_flickr8k", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|arabert256_flickr8k| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|505.1 MB| + +## References + +https://huggingface.co/jontooy/AraBERT256-Flickr8k \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-arabert32_coco_en.md b/docs/_posts/ahmedlone127/2023-09-12-arabert32_coco_en.md new file mode 100644 index 00000000000000..c953572c2f84fa --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-arabert32_coco_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English arabert32_coco BertEmbeddings from jontooy +author: John Snow Labs +name: arabert32_coco +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`arabert32_coco` is a English model originally trained by jontooy. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/arabert32_coco_en_5.1.1_3.0_1694509854276.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/arabert32_coco_en_5.1.1_3.0_1694509854276.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("arabert32_coco","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("arabert32_coco", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|arabert32_coco| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|505.1 MB| + +## References + +https://huggingface.co/jontooy/AraBERT32-COCO \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-arabert_c19_ar.md b/docs/_posts/ahmedlone127/2023-09-12-arabert_c19_ar.md new file mode 100644 index 00000000000000..3cff9a535af3ea --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-arabert_c19_ar.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Arabic arabert_c19 BertEmbeddings from moha +author: John Snow Labs +name: arabert_c19 +date: 2023-09-12 +tags: [bert, ar, open_source, fill_mask, onnx] +task: Embeddings +language: ar +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`arabert_c19` is a Arabic model originally trained by moha. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/arabert_c19_ar_5.1.1_3.0_1694554366327.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/arabert_c19_ar_5.1.1_3.0_1694554366327.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("arabert_c19","ar") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("arabert_c19", "ar") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|arabert_c19| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|ar| +|Size:|504.9 MB| + +## References + +https://huggingface.co/moha/arabert_c19 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-arabertmo_base_v1_ar.md b/docs/_posts/ahmedlone127/2023-09-12-arabertmo_base_v1_ar.md new file mode 100644 index 00000000000000..adf89169ecb019 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-arabertmo_base_v1_ar.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Arabic arabertmo_base_v1 BertEmbeddings from Ebtihal +author: John Snow Labs +name: arabertmo_base_v1 +date: 2023-09-12 +tags: [bert, ar, open_source, fill_mask, onnx] +task: Embeddings +language: ar +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`arabertmo_base_v1` is a Arabic model originally trained by Ebtihal. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/arabertmo_base_v1_ar_5.1.1_3.0_1694509594398.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/arabertmo_base_v1_ar_5.1.1_3.0_1694509594398.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("arabertmo_base_v1","ar") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("arabertmo_base_v1", "ar") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|arabertmo_base_v1| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|ar| +|Size:|407.8 MB| + +## References + +https://huggingface.co/Ebtihal/AraBertMo_base_V1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-arabertmo_base_v2_ar.md b/docs/_posts/ahmedlone127/2023-09-12-arabertmo_base_v2_ar.md new file mode 100644 index 00000000000000..3d49800c8ab1f1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-arabertmo_base_v2_ar.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Arabic arabertmo_base_v2 BertEmbeddings from Ebtihal +author: John Snow Labs +name: arabertmo_base_v2 +date: 2023-09-12 +tags: [bert, ar, open_source, fill_mask, onnx] +task: Embeddings +language: ar +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`arabertmo_base_v2` is a Arabic model originally trained by Ebtihal. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/arabertmo_base_v2_ar_5.1.1_3.0_1694509720231.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/arabertmo_base_v2_ar_5.1.1_3.0_1694509720231.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("arabertmo_base_v2","ar") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("arabertmo_base_v2", "ar") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|arabertmo_base_v2| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|ar| +|Size:|407.8 MB| + +## References + +https://huggingface.co/Ebtihal/AraBertMo_base_V2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-arabertmo_base_v3_ar.md b/docs/_posts/ahmedlone127/2023-09-12-arabertmo_base_v3_ar.md new file mode 100644 index 00000000000000..50e019c20dcb7c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-arabertmo_base_v3_ar.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Arabic arabertmo_base_v3 BertEmbeddings from Ebtihal +author: John Snow Labs +name: arabertmo_base_v3 +date: 2023-09-12 +tags: [bert, ar, open_source, fill_mask, onnx] +task: Embeddings +language: ar +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`arabertmo_base_v3` is a Arabic model originally trained by Ebtihal. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/arabertmo_base_v3_ar_5.1.1_3.0_1694509862023.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/arabertmo_base_v3_ar_5.1.1_3.0_1694509862023.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("arabertmo_base_v3","ar") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("arabertmo_base_v3", "ar") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|arabertmo_base_v3| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|ar| +|Size:|407.8 MB| + +## References + +https://huggingface.co/Ebtihal/AraBertMo_base_V3 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-arabertmo_base_v4_ar.md b/docs/_posts/ahmedlone127/2023-09-12-arabertmo_base_v4_ar.md new file mode 100644 index 00000000000000..2fa8ff3e650214 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-arabertmo_base_v4_ar.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Arabic arabertmo_base_v4 BertEmbeddings from Ebtihal +author: John Snow Labs +name: arabertmo_base_v4 +date: 2023-09-12 +tags: [bert, ar, open_source, fill_mask, onnx] +task: Embeddings +language: ar +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`arabertmo_base_v4` is a Arabic model originally trained by Ebtihal. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/arabertmo_base_v4_ar_5.1.1_3.0_1694510028864.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/arabertmo_base_v4_ar_5.1.1_3.0_1694510028864.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("arabertmo_base_v4","ar") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("arabertmo_base_v4", "ar") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|arabertmo_base_v4| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|ar| +|Size:|407.9 MB| + +## References + +https://huggingface.co/Ebtihal/AraBertMo_base_V4 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-arabertmo_base_v5_ar.md b/docs/_posts/ahmedlone127/2023-09-12-arabertmo_base_v5_ar.md new file mode 100644 index 00000000000000..4525a6a58b56fd --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-arabertmo_base_v5_ar.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Arabic arabertmo_base_v5 BertEmbeddings from Ebtihal +author: John Snow Labs +name: arabertmo_base_v5 +date: 2023-09-12 +tags: [bert, ar, open_source, fill_mask, onnx] +task: Embeddings +language: ar +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`arabertmo_base_v5` is a Arabic model originally trained by Ebtihal. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/arabertmo_base_v5_ar_5.1.1_3.0_1694510170817.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/arabertmo_base_v5_ar_5.1.1_3.0_1694510170817.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("arabertmo_base_v5","ar") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("arabertmo_base_v5", "ar") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|arabertmo_base_v5| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|ar| +|Size:|407.9 MB| + +## References + +https://huggingface.co/Ebtihal/AraBertMo_base_V5 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-arabertmo_base_v6_ar.md b/docs/_posts/ahmedlone127/2023-09-12-arabertmo_base_v6_ar.md new file mode 100644 index 00000000000000..737813848f622d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-arabertmo_base_v6_ar.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Arabic arabertmo_base_v6 BertEmbeddings from Ebtihal +author: John Snow Labs +name: arabertmo_base_v6 +date: 2023-09-12 +tags: [bert, ar, open_source, fill_mask, onnx] +task: Embeddings +language: ar +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`arabertmo_base_v6` is a Arabic model originally trained by Ebtihal. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/arabertmo_base_v6_ar_5.1.1_3.0_1694510331239.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/arabertmo_base_v6_ar_5.1.1_3.0_1694510331239.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("arabertmo_base_v6","ar") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("arabertmo_base_v6", "ar") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|arabertmo_base_v6| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|ar| +|Size:|407.9 MB| + +## References + +https://huggingface.co/Ebtihal/AraBertMo_base_V6 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-arabertmo_base_v7_en.md b/docs/_posts/ahmedlone127/2023-09-12-arabertmo_base_v7_en.md new file mode 100644 index 00000000000000..76058d595f77cb --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-arabertmo_base_v7_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English arabertmo_base_v7 BertEmbeddings from Ebtihal +author: John Snow Labs +name: arabertmo_base_v7 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`arabertmo_base_v7` is a English model originally trained by Ebtihal. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/arabertmo_base_v7_en_5.1.1_3.0_1694510465368.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/arabertmo_base_v7_en_5.1.1_3.0_1694510465368.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("arabertmo_base_v7","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("arabertmo_base_v7", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|arabertmo_base_v7| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.9 MB| + +## References + +https://huggingface.co/Ebtihal/AraBertMo_base_V7 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-arabertmo_base_v8_en.md b/docs/_posts/ahmedlone127/2023-09-12-arabertmo_base_v8_en.md new file mode 100644 index 00000000000000..71772712dcccf2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-arabertmo_base_v8_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English arabertmo_base_v8 BertEmbeddings from Ebtihal +author: John Snow Labs +name: arabertmo_base_v8 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`arabertmo_base_v8` is a English model originally trained by Ebtihal. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/arabertmo_base_v8_en_5.1.1_3.0_1694510601521.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/arabertmo_base_v8_en_5.1.1_3.0_1694510601521.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("arabertmo_base_v8","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("arabertmo_base_v8", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|arabertmo_base_v8| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.9 MB| + +## References + +https://huggingface.co/Ebtihal/AraBertMo_base_V8 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-arabertmo_base_v9_en.md b/docs/_posts/ahmedlone127/2023-09-12-arabertmo_base_v9_en.md new file mode 100644 index 00000000000000..56b5aea49b96a7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-arabertmo_base_v9_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English arabertmo_base_v9 BertEmbeddings from Ebtihal +author: John Snow Labs +name: arabertmo_base_v9 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`arabertmo_base_v9` is a English model originally trained by Ebtihal. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/arabertmo_base_v9_en_5.1.1_3.0_1694510724445.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/arabertmo_base_v9_en_5.1.1_3.0_1694510724445.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("arabertmo_base_v9","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("arabertmo_base_v9", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|arabertmo_base_v9| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.9 MB| + +## References + +https://huggingface.co/Ebtihal/AraBertMo_base_V9 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-arabic_quran_nahj_sahife_ar.md b/docs/_posts/ahmedlone127/2023-09-12-arabic_quran_nahj_sahife_ar.md new file mode 100644 index 00000000000000..f0a31f7ef38fa0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-arabic_quran_nahj_sahife_ar.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Arabic arabic_quran_nahj_sahife BertEmbeddings from pourmand1376 +author: John Snow Labs +name: arabic_quran_nahj_sahife +date: 2023-09-12 +tags: [bert, ar, open_source, fill_mask, onnx] +task: Embeddings +language: ar +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`arabic_quran_nahj_sahife` is a Arabic model originally trained by pourmand1376. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/arabic_quran_nahj_sahife_ar_5.1.1_3.0_1694547794347.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/arabic_quran_nahj_sahife_ar_5.1.1_3.0_1694547794347.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("arabic_quran_nahj_sahife","ar") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("arabic_quran_nahj_sahife", "ar") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|arabic_quran_nahj_sahife| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|ar| +|Size:|412.0 MB| + +## References + +https://huggingface.co/pourmand1376/arabic-quran-nahj-sahife \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-aristoberto_en.md b/docs/_posts/ahmedlone127/2023-09-12-aristoberto_en.md new file mode 100644 index 00000000000000..10d30d7d6fd037 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-aristoberto_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English aristoberto BertEmbeddings from Jacobo +author: John Snow Labs +name: aristoberto +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`aristoberto` is a English model originally trained by Jacobo. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/aristoberto_en_5.1.1_3.0_1694561096224.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/aristoberto_en_5.1.1_3.0_1694561096224.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("aristoberto","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("aristoberto", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|aristoberto| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|420.1 MB| + +## References + +https://huggingface.co/Jacobo/aristoBERTo \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-astrobert_en.md b/docs/_posts/ahmedlone127/2023-09-12-astrobert_en.md new file mode 100644 index 00000000000000..ce4bf012e03b87 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-astrobert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English astrobert BertEmbeddings from adsabs +author: John Snow Labs +name: astrobert +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`astrobert` is a English model originally trained by adsabs. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/astrobert_en_5.1.1_3.0_1694554976289.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/astrobert_en_5.1.1_3.0_1694554976289.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("astrobert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("astrobert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|astrobert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|405.6 MB| + +## References + +https://huggingface.co/adsabs/astroBERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-baby_model_en.md b/docs/_posts/ahmedlone127/2023-09-12-baby_model_en.md new file mode 100644 index 00000000000000..10d7b3bacb6524 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-baby_model_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English baby_model BertEmbeddings from patNike +author: John Snow Labs +name: baby_model +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`baby_model` is a English model originally trained by patNike. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/baby_model_en_5.1.1_3.0_1694562898992.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/baby_model_en_5.1.1_3.0_1694562898992.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("baby_model","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("baby_model", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|baby_model| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/patNike/baby_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-berel_base_en.md b/docs/_posts/ahmedlone127/2023-09-12-berel_base_en.md new file mode 100644 index 00000000000000..8560c91596e653 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-berel_base_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English berel_base BertEmbeddings from t4-project +author: John Snow Labs +name: berel_base +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`berel_base` is a English model originally trained by t4-project. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/berel_base_en_5.1.1_3.0_1694559343111.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/berel_base_en_5.1.1_3.0_1694559343111.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("berel_base","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("berel_base", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|berel_base| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|690.1 MB| + +## References + +https://huggingface.co/t4-project/BEREL-base \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_1760_1850_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_1760_1850_en.md new file mode 100644 index 00000000000000..520ec650e3ca92 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_1760_1850_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_1760_1850 BertEmbeddings from Livingwithmachines +author: John Snow Labs +name: bert_1760_1850 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_1760_1850` is a English model originally trained by Livingwithmachines. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_1760_1850_en_5.1.1_3.0_1694561341595.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_1760_1850_en_5.1.1_3.0_1694561341595.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_1760_1850","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_1760_1850", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_1760_1850| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/Livingwithmachines/bert_1760_1850 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_1760_1900_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_1760_1900_en.md new file mode 100644 index 00000000000000..452403dad636d9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_1760_1900_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_1760_1900 BertEmbeddings from Livingwithmachines +author: John Snow Labs +name: bert_1760_1900 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_1760_1900` is a English model originally trained by Livingwithmachines. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_1760_1900_en_5.1.1_3.0_1694561483109.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_1760_1900_en_5.1.1_3.0_1694561483109.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_1760_1900","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_1760_1900", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_1760_1900| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|406.9 MB| + +## References + +https://huggingface.co/Livingwithmachines/bert_1760_1900 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_1850_1875_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_1850_1875_en.md new file mode 100644 index 00000000000000..82f7734b491242 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_1850_1875_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_1850_1875 BertEmbeddings from Livingwithmachines +author: John Snow Labs +name: bert_1850_1875 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_1850_1875` is a English model originally trained by Livingwithmachines. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_1850_1875_en_5.1.1_3.0_1694561623490.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_1850_1875_en_5.1.1_3.0_1694561623490.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_1850_1875","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_1850_1875", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_1850_1875| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.0 MB| + +## References + +https://huggingface.co/Livingwithmachines/bert_1850_1875 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_1875_1890_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_1875_1890_en.md new file mode 100644 index 00000000000000..f4f5507f9b6045 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_1875_1890_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_1875_1890 BertEmbeddings from Livingwithmachines +author: John Snow Labs +name: bert_1875_1890 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_1875_1890` is a English model originally trained by Livingwithmachines. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_1875_1890_en_5.1.1_3.0_1694561751322.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_1875_1890_en_5.1.1_3.0_1694561751322.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_1875_1890","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_1875_1890", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_1875_1890| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.0 MB| + +## References + +https://huggingface.co/Livingwithmachines/bert_1875_1890 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_1890_1900_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_1890_1900_en.md new file mode 100644 index 00000000000000..9aab57cf704483 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_1890_1900_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_1890_1900 BertEmbeddings from Livingwithmachines +author: John Snow Labs +name: bert_1890_1900 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_1890_1900` is a English model originally trained by Livingwithmachines. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_1890_1900_en_5.1.1_3.0_1694561889659.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_1890_1900_en_5.1.1_3.0_1694561889659.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_1890_1900","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_1890_1900", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_1890_1900| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.0 MB| + +## References + +https://huggingface.co/Livingwithmachines/bert_1890_1900 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_ancient_chinese_zh.md b/docs/_posts/ahmedlone127/2023-09-12-bert_ancient_chinese_zh.md new file mode 100644 index 00000000000000..3bfe0f08bfacef --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_ancient_chinese_zh.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Chinese bert_ancient_chinese BertEmbeddings from Jihuai +author: John Snow Labs +name: bert_ancient_chinese +date: 2023-09-12 +tags: [bert, zh, open_source, fill_mask, onnx] +task: Embeddings +language: zh +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_ancient_chinese` is a Chinese model originally trained by Jihuai. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_ancient_chinese_zh_5.1.1_3.0_1694547641835.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_ancient_chinese_zh_5.1.1_3.0_1694547641835.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_ancient_chinese","zh") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_ancient_chinese", "zh") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_ancient_chinese| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|zh| +|Size:|430.5 MB| + +## References + +https://huggingface.co/Jihuai/bert-ancient-chinese \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_10lang_cased_xx.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_10lang_cased_xx.md new file mode 100644 index 00000000000000..630eef5ec4eeb3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_10lang_cased_xx.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Multilingual bert_base_10lang_cased BertEmbeddings from Geotrend +author: John Snow Labs +name: bert_base_10lang_cased +date: 2023-09-12 +tags: [bert, xx, open_source, fill_mask, onnx] +task: Embeddings +language: xx +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_10lang_cased` is a Multilingual model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_10lang_cased_xx_5.1.1_3.0_1694549523236.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_10lang_cased_xx_5.1.1_3.0_1694549523236.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_10lang_cased","xx") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_10lang_cased", "xx") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_10lang_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|xx| +|Size:|514.3 MB| + +## References + +https://huggingface.co/Geotrend/bert-base-10lang-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_15lang_cased_xx.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_15lang_cased_xx.md new file mode 100644 index 00000000000000..63be7c86770505 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_15lang_cased_xx.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Multilingual bert_base_15lang_cased BertEmbeddings from Geotrend +author: John Snow Labs +name: bert_base_15lang_cased +date: 2023-09-12 +tags: [bert, xx, open_source, fill_mask, onnx] +task: Embeddings +language: xx +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_15lang_cased` is a Multilingual model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_15lang_cased_xx_5.1.1_3.0_1694549707123.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_15lang_cased_xx_5.1.1_3.0_1694549707123.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_15lang_cased","xx") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_15lang_cased", "xx") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_15lang_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|xx| +|Size:|526.6 MB| + +## References + +https://huggingface.co/Geotrend/bert-base-15lang-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_25lang_cased_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_25lang_cased_en.md new file mode 100644 index 00000000000000..c84718c61a1780 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_25lang_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_25lang_cased BertEmbeddings from Geotrend +author: John Snow Labs +name: bert_base_25lang_cased +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_25lang_cased` is a English model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_25lang_cased_en_5.1.1_3.0_1694549898376.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_25lang_cased_en_5.1.1_3.0_1694549898376.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_25lang_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_25lang_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_25lang_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|565.3 MB| + +## References + +https://huggingface.co/Geotrend/bert-base-25lang-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_cased_divehi_v2_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_cased_divehi_v2_en.md new file mode 100644 index 00000000000000..5f0da46f695097 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_cased_divehi_v2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_cased_divehi_v2 BertEmbeddings from mahfooz +author: John Snow Labs +name: bert_base_cased_divehi_v2 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_cased_divehi_v2` is a English model originally trained by mahfooz. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_cased_divehi_v2_en_5.1.1_3.0_1694549387188.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_cased_divehi_v2_en_5.1.1_3.0_1694549387188.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_cased_divehi_v2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_cased_divehi_v2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_cased_divehi_v2| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.5 MB| + +## References + +https://huggingface.co/mahfooz/bert-base-cased-dv-v2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_cased_finetuned_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_cased_finetuned_en.md new file mode 100644 index 00000000000000..612e93eb431eda --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_cased_finetuned_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_cased_finetuned BertEmbeddings from GusNicho +author: John Snow Labs +name: bert_base_cased_finetuned +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_cased_finetuned` is a English model originally trained by GusNicho. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_cased_finetuned_en_5.1.1_3.0_1694559327612.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_cased_finetuned_en_5.1.1_3.0_1694559327612.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_cased_finetuned","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_cased_finetuned", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_cased_finetuned| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.6 MB| + +## References + +https://huggingface.co/GusNicho/bert-base-cased-finetuned \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_cased_test_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_cased_test_en.md new file mode 100644 index 00000000000000..6257d2040102f0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_cased_test_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_cased_test BertEmbeddings from chenyu313 +author: John Snow Labs +name: bert_base_cased_test +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_cased_test` is a English model originally trained by chenyu313. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_cased_test_en_5.1.1_3.0_1694554576959.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_cased_test_en_5.1.1_3.0_1694554576959.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_cased_test","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_cased_test", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_cased_test| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.6 MB| + +## References + +https://huggingface.co/chenyu313/bert-base-cased-test \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_dutch_cased_finetuned_germanic_languages_nl.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_dutch_cased_finetuned_germanic_languages_nl.md new file mode 100644 index 00000000000000..85066ea191d1c5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_dutch_cased_finetuned_germanic_languages_nl.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Dutch, Flemish bert_base_dutch_cased_finetuned_germanic_languages BertEmbeddings from GeniusVoice +author: John Snow Labs +name: bert_base_dutch_cased_finetuned_germanic_languages +date: 2023-09-12 +tags: [bert, nl, open_source, fill_mask, onnx] +task: Embeddings +language: nl +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_dutch_cased_finetuned_germanic_languages` is a Dutch, Flemish model originally trained by GeniusVoice. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_dutch_cased_finetuned_germanic_languages_nl_5.1.1_3.0_1694549349357.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_dutch_cased_finetuned_germanic_languages_nl_5.1.1_3.0_1694549349357.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_dutch_cased_finetuned_germanic_languages","nl") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_dutch_cased_finetuned_germanic_languages", "nl") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_dutch_cased_finetuned_germanic_languages| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|nl| +|Size:|406.8 MB| + +## References + +https://huggingface.co/GeniusVoice/bert-base-dutch-cased-finetuned-gem \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_dutch_cased_gronlp_nl.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_dutch_cased_gronlp_nl.md new file mode 100644 index 00000000000000..b8755890f4c0e0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_dutch_cased_gronlp_nl.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Dutch, Flemish bert_base_dutch_cased_gronlp BertEmbeddings from GroNLP +author: John Snow Labs +name: bert_base_dutch_cased_gronlp +date: 2023-09-12 +tags: [bert, nl, open_source, fill_mask, onnx] +task: Embeddings +language: nl +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_dutch_cased_gronlp` is a Dutch, Flemish model originally trained by GroNLP. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_dutch_cased_gronlp_nl_5.1.1_3.0_1694559021052.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_dutch_cased_gronlp_nl_5.1.1_3.0_1694559021052.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_dutch_cased_gronlp","nl") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_dutch_cased_gronlp", "nl") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_dutch_cased_gronlp| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|nl| +|Size:|406.8 MB| + +## References + +https://huggingface.co/GroNLP/bert-base-dutch-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_arabic_cased_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_arabic_cased_en.md new file mode 100644 index 00000000000000..0da02178e7a8f4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_arabic_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_english_arabic_cased BertEmbeddings from Geotrend +author: John Snow Labs +name: bert_base_english_arabic_cased +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_english_arabic_cased` is a English model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_english_arabic_cased_en_5.1.1_3.0_1694550500308.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_english_arabic_cased_en_5.1.1_3.0_1694550500308.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_english_arabic_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_english_arabic_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_english_arabic_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|412.4 MB| + +## References + +https://huggingface.co/Geotrend/bert-base-en-ar-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_bulgarian_cased_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_bulgarian_cased_en.md new file mode 100644 index 00000000000000..498ecc8acdce6d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_bulgarian_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_english_bulgarian_cased BertEmbeddings from Geotrend +author: John Snow Labs +name: bert_base_english_bulgarian_cased +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_english_bulgarian_cased` is a English model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_english_bulgarian_cased_en_5.1.1_3.0_1694550674451.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_english_bulgarian_cased_en_5.1.1_3.0_1694550674451.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_english_bulgarian_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_english_bulgarian_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_english_bulgarian_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|420.0 MB| + +## References + +https://huggingface.co/Geotrend/bert-base-en-bg-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_chinese_cased_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_chinese_cased_en.md new file mode 100644 index 00000000000000..109b67366824be --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_chinese_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_english_chinese_cased BertEmbeddings from Geotrend +author: John Snow Labs +name: bert_base_english_chinese_cased +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_english_chinese_cased` is a English model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_english_chinese_cased_en_5.1.1_3.0_1694556350417.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_english_chinese_cased_en_5.1.1_3.0_1694556350417.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_english_chinese_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_english_chinese_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_english_chinese_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|421.8 MB| + +## References + +https://huggingface.co/Geotrend/bert-base-en-zh-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_chinese_hindi_cased_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_chinese_hindi_cased_en.md new file mode 100644 index 00000000000000..5d9a398859b97b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_chinese_hindi_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_english_chinese_hindi_cased BertEmbeddings from Geotrend +author: John Snow Labs +name: bert_base_english_chinese_hindi_cased +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_english_chinese_hindi_cased` is a English model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_english_chinese_hindi_cased_en_5.1.1_3.0_1694556485723.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_english_chinese_hindi_cased_en_5.1.1_3.0_1694556485723.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_english_chinese_hindi_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_english_chinese_hindi_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_english_chinese_hindi_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|426.3 MB| + +## References + +https://huggingface.co/Geotrend/bert-base-en-zh-hi-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_danish_cased_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_danish_cased_en.md new file mode 100644 index 00000000000000..73ec5ce0f00e26 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_danish_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_english_danish_cased BertEmbeddings from Geotrend +author: John Snow Labs +name: bert_base_english_danish_cased +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_english_danish_cased` is a English model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_english_danish_cased_en_5.1.1_3.0_1694550943639.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_english_danish_cased_en_5.1.1_3.0_1694550943639.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_english_danish_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_english_danish_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_english_danish_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|414.6 MB| + +## References + +https://huggingface.co/Geotrend/bert-base-en-da-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_french_arabic_cased_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_french_arabic_cased_en.md new file mode 100644 index 00000000000000..bc901d17436626 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_french_arabic_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_english_french_arabic_cased BertEmbeddings from Geotrend +author: John Snow Labs +name: bert_base_english_french_arabic_cased +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_english_french_arabic_cased` is a English model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_english_french_arabic_cased_en_5.1.1_3.0_1694551872020.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_english_french_arabic_cased_en_5.1.1_3.0_1694551872020.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_english_french_arabic_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_english_french_arabic_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_english_french_arabic_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|426.0 MB| + +## References + +https://huggingface.co/Geotrend/bert-base-en-fr-ar-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_french_cased_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_french_cased_en.md new file mode 100644 index 00000000000000..505fc85204dfd4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_french_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_english_french_cased BertEmbeddings from Geotrend +author: John Snow Labs +name: bert_base_english_french_cased +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_english_french_cased` is a English model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_english_french_cased_en_5.1.1_3.0_1694552022056.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_english_french_cased_en_5.1.1_3.0_1694552022056.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_english_french_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_english_french_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_english_french_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|416.5 MB| + +## References + +https://huggingface.co/Geotrend/bert-base-en-fr-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_french_chinese_cased_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_french_chinese_cased_en.md new file mode 100644 index 00000000000000..90a546f334b71d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_french_chinese_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_english_french_chinese_cased BertEmbeddings from Geotrend +author: John Snow Labs +name: bert_base_english_french_chinese_cased +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_english_french_chinese_cased` is a English model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_english_french_chinese_cased_en_5.1.1_3.0_1694553556344.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_english_french_chinese_cased_en_5.1.1_3.0_1694553556344.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_english_french_chinese_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_english_french_chinese_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_english_french_chinese_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|435.3 MB| + +## References + +https://huggingface.co/Geotrend/bert-base-en-fr-zh-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_french_chinese_japanese_vietnamese_cased_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_french_chinese_japanese_vietnamese_cased_en.md new file mode 100644 index 00000000000000..b30cb7c2282886 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_french_chinese_japanese_vietnamese_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_english_french_chinese_japanese_vietnamese_cased BertEmbeddings from Geotrend +author: John Snow Labs +name: bert_base_english_french_chinese_japanese_vietnamese_cased +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_english_french_chinese_japanese_vietnamese_cased` is a English model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_english_french_chinese_japanese_vietnamese_cased_en_5.1.1_3.0_1694553703735.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_english_french_chinese_japanese_vietnamese_cased_en_5.1.1_3.0_1694553703735.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_english_french_chinese_japanese_vietnamese_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_english_french_chinese_japanese_vietnamese_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_english_french_chinese_japanese_vietnamese_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|446.6 MB| + +## References + +https://huggingface.co/Geotrend/bert-base-en-fr-zh-ja-vi-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_french_danish_japanese_vietnamese_cased_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_french_danish_japanese_vietnamese_cased_en.md new file mode 100644 index 00000000000000..c9f7fefe5c89ad --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_french_danish_japanese_vietnamese_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_english_french_danish_japanese_vietnamese_cased BertEmbeddings from Geotrend +author: John Snow Labs +name: bert_base_english_french_danish_japanese_vietnamese_cased +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_english_french_danish_japanese_vietnamese_cased` is a English model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_english_french_danish_japanese_vietnamese_cased_en_5.1.1_3.0_1694552158682.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_english_french_danish_japanese_vietnamese_cased_en_5.1.1_3.0_1694552158682.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_english_french_danish_japanese_vietnamese_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_english_french_danish_japanese_vietnamese_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_english_french_danish_japanese_vietnamese_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|446.6 MB| + +## References + +https://huggingface.co/Geotrend/bert-base-en-fr-da-ja-vi-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_french_dutch_russian_arabic_cased_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_french_dutch_russian_arabic_cased_en.md new file mode 100644 index 00000000000000..d3d6b817004f00 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_french_dutch_russian_arabic_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_english_french_dutch_russian_arabic_cased BertEmbeddings from Geotrend +author: John Snow Labs +name: bert_base_english_french_dutch_russian_arabic_cased +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_english_french_dutch_russian_arabic_cased` is a English model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_english_french_dutch_russian_arabic_cased_en_5.1.1_3.0_1694553406628.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_english_french_dutch_russian_arabic_cased_en_5.1.1_3.0_1694553406628.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_english_french_dutch_russian_arabic_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_english_french_dutch_russian_arabic_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_english_french_dutch_russian_arabic_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|461.5 MB| + +## References + +https://huggingface.co/Geotrend/bert-base-en-fr-nl-ru-ar-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_french_german_cased_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_french_german_cased_en.md new file mode 100644 index 00000000000000..9b87ae583b200f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_french_german_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_english_french_german_cased BertEmbeddings from Geotrend +author: John Snow Labs +name: bert_base_english_french_german_cased +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_english_french_german_cased` is a English model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_english_french_german_cased_en_5.1.1_3.0_1694552319403.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_english_french_german_cased_en_5.1.1_3.0_1694552319403.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_english_french_german_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_english_french_german_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_english_french_german_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|432.7 MB| + +## References + +https://huggingface.co/Geotrend/bert-base-en-fr-de-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_french_german_norwegian_danish_cased_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_french_german_norwegian_danish_cased_en.md new file mode 100644 index 00000000000000..4673eebea8eaab --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_french_german_norwegian_danish_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_english_french_german_norwegian_danish_cased BertEmbeddings from Geotrend +author: John Snow Labs +name: bert_base_english_french_german_norwegian_danish_cased +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_english_french_german_norwegian_danish_cased` is a English model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_english_french_german_norwegian_danish_cased_en_5.1.1_3.0_1694552478365.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_english_french_german_norwegian_danish_cased_en_5.1.1_3.0_1694552478365.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_english_french_german_norwegian_danish_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_english_french_german_norwegian_danish_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_english_french_german_norwegian_danish_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|440.5 MB| + +## References + +https://huggingface.co/Geotrend/bert-base-en-fr-de-no-da-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_french_italian_cased_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_french_italian_cased_en.md new file mode 100644 index 00000000000000..f2fc131d2f6b9a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_french_italian_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_english_french_italian_cased BertEmbeddings from Geotrend +author: John Snow Labs +name: bert_base_english_french_italian_cased +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_english_french_italian_cased` is a English model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_english_french_italian_cased_en_5.1.1_3.0_1694553098457.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_english_french_italian_cased_en_5.1.1_3.0_1694553098457.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_english_french_italian_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_english_french_italian_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_english_french_italian_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|428.1 MB| + +## References + +https://huggingface.co/Geotrend/bert-base-en-fr-it-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_french_lithuanian_norwegian_polish_cased_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_french_lithuanian_norwegian_polish_cased_en.md new file mode 100644 index 00000000000000..b95580bffaeb7f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_french_lithuanian_norwegian_polish_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_english_french_lithuanian_norwegian_polish_cased BertEmbeddings from Geotrend +author: John Snow Labs +name: bert_base_english_french_lithuanian_norwegian_polish_cased +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_english_french_lithuanian_norwegian_polish_cased` is a English model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_english_french_lithuanian_norwegian_polish_cased_en_5.1.1_3.0_1694553253099.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_english_french_lithuanian_norwegian_polish_cased_en_5.1.1_3.0_1694553253099.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_english_french_lithuanian_norwegian_polish_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_english_french_lithuanian_norwegian_polish_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_english_french_lithuanian_norwegian_polish_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|441.9 MB| + +## References + +https://huggingface.co/Geotrend/bert-base-en-fr-lt-no-pl-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_french_spanish_cased_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_french_spanish_cased_en.md new file mode 100644 index 00000000000000..09ad6305d4fe16 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_french_spanish_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_english_french_spanish_cased BertEmbeddings from Geotrend +author: John Snow Labs +name: bert_base_english_french_spanish_cased +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_english_french_spanish_cased` is a English model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_english_french_spanish_cased_en_5.1.1_3.0_1694552634510.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_english_french_spanish_cased_en_5.1.1_3.0_1694552634510.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_english_french_spanish_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_english_french_spanish_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_english_french_spanish_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|433.1 MB| + +## References + +https://huggingface.co/Geotrend/bert-base-en-fr-es-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_french_spanish_german_chinese_cased_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_french_spanish_german_chinese_cased_en.md new file mode 100644 index 00000000000000..164b1bfa67175d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_french_spanish_german_chinese_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_english_french_spanish_german_chinese_cased BertEmbeddings from Geotrend +author: John Snow Labs +name: bert_base_english_french_spanish_german_chinese_cased +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_english_french_spanish_german_chinese_cased` is a English model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_english_french_spanish_german_chinese_cased_en_5.1.1_3.0_1694552792508.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_english_french_spanish_german_chinese_cased_en_5.1.1_3.0_1694552792508.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_english_french_spanish_german_chinese_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_english_french_spanish_german_chinese_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_english_french_spanish_german_chinese_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|466.7 MB| + +## References + +https://huggingface.co/Geotrend/bert-base-en-fr-es-de-zh-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_french_spanish_portuguese_italian_cased_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_french_spanish_portuguese_italian_cased_en.md new file mode 100644 index 00000000000000..4de6c38b51eb29 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_french_spanish_portuguese_italian_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_english_french_spanish_portuguese_italian_cased BertEmbeddings from Geotrend +author: John Snow Labs +name: bert_base_english_french_spanish_portuguese_italian_cased +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_english_french_spanish_portuguese_italian_cased` is a English model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_english_french_spanish_portuguese_italian_cased_en_5.1.1_3.0_1694552951990.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_english_french_spanish_portuguese_italian_cased_en_5.1.1_3.0_1694552951990.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_english_french_spanish_portuguese_italian_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_english_french_spanish_portuguese_italian_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_english_french_spanish_portuguese_italian_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|444.6 MB| + +## References + +https://huggingface.co/Geotrend/bert-base-en-fr-es-pt-it-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_german_cased_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_german_cased_en.md new file mode 100644 index 00000000000000..ab954c6eec1a23 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_german_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_english_german_cased BertEmbeddings from Geotrend +author: John Snow Labs +name: bert_base_english_german_cased +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_english_german_cased` is a English model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_english_german_cased_en_5.1.1_3.0_1694551087783.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_english_german_cased_en_5.1.1_3.0_1694551087783.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_english_german_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_english_german_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_english_german_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|421.8 MB| + +## References + +https://huggingface.co/Geotrend/bert-base-en-de-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_hindi_cased_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_hindi_cased_en.md new file mode 100644 index 00000000000000..dae53a7b0be28f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_hindi_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_english_hindi_cased BertEmbeddings from Geotrend +author: John Snow Labs +name: bert_base_english_hindi_cased +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_english_hindi_cased` is a English model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_english_hindi_cased_en_5.1.1_3.0_1694553855877.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_english_hindi_cased_en_5.1.1_3.0_1694553855877.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_english_hindi_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_english_hindi_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_english_hindi_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|406.9 MB| + +## References + +https://huggingface.co/Geotrend/bert-base-en-hi-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_italian_cased_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_italian_cased_en.md new file mode 100644 index 00000000000000..c2e9d25b892aaa --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_italian_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_english_italian_cased BertEmbeddings from Geotrend +author: John Snow Labs +name: bert_base_english_italian_cased +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_english_italian_cased` is a English model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_english_italian_cased_en_5.1.1_3.0_1694553994811.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_english_italian_cased_en_5.1.1_3.0_1694553994811.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_english_italian_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_english_italian_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_english_italian_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|417.9 MB| + +## References + +https://huggingface.co/Geotrend/bert-base-en-it-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_japanese_cased_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_japanese_cased_en.md new file mode 100644 index 00000000000000..0f0c199d37599b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_japanese_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_english_japanese_cased BertEmbeddings from Geotrend +author: John Snow Labs +name: bert_base_english_japanese_cased +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_english_japanese_cased` is a English model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_english_japanese_cased_en_5.1.1_3.0_1694554115375.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_english_japanese_cased_en_5.1.1_3.0_1694554115375.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_english_japanese_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_english_japanese_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_english_japanese_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|416.3 MB| + +## References + +https://huggingface.co/Geotrend/bert-base-en-ja-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_lithuanian_cased_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_lithuanian_cased_en.md new file mode 100644 index 00000000000000..6d3dc265adafc6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_lithuanian_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_english_lithuanian_cased BertEmbeddings from Geotrend +author: John Snow Labs +name: bert_base_english_lithuanian_cased +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_english_lithuanian_cased` is a English model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_english_lithuanian_cased_en_5.1.1_3.0_1694554240405.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_english_lithuanian_cased_en_5.1.1_3.0_1694554240405.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_english_lithuanian_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_english_lithuanian_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_english_lithuanian_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|411.9 MB| + +## References + +https://huggingface.co/Geotrend/bert-base-en-lt-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_norwegian_cased_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_norwegian_cased_en.md new file mode 100644 index 00000000000000..73cb0747dbf2bd --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_norwegian_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_english_norwegian_cased BertEmbeddings from Geotrend +author: John Snow Labs +name: bert_base_english_norwegian_cased +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_english_norwegian_cased` is a English model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_english_norwegian_cased_en_5.1.1_3.0_1694554556744.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_english_norwegian_cased_en_5.1.1_3.0_1694554556744.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_english_norwegian_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_english_norwegian_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_english_norwegian_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|415.6 MB| + +## References + +https://huggingface.co/Geotrend/bert-base-en-no-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_polish_cased_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_polish_cased_en.md new file mode 100644 index 00000000000000..372a14b4d7dd63 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_polish_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_english_polish_cased BertEmbeddings from Geotrend +author: John Snow Labs +name: bert_base_english_polish_cased +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_english_polish_cased` is a English model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_english_polish_cased_en_5.1.1_3.0_1694554731414.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_english_polish_cased_en_5.1.1_3.0_1694554731414.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_english_polish_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_english_polish_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_english_polish_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|417.9 MB| + +## References + +https://huggingface.co/Geotrend/bert-base-en-pl-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_portuguese_cased_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_portuguese_cased_en.md new file mode 100644 index 00000000000000..b378b9fc4d462f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_portuguese_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_english_portuguese_cased BertEmbeddings from Geotrend +author: John Snow Labs +name: bert_base_english_portuguese_cased +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_english_portuguese_cased` is a English model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_english_portuguese_cased_en_5.1.1_3.0_1694554858787.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_english_portuguese_cased_en_5.1.1_3.0_1694554858787.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_english_portuguese_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_english_portuguese_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_english_portuguese_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|419.2 MB| + +## References + +https://huggingface.co/Geotrend/bert-base-en-pt-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_romanian_cased_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_romanian_cased_en.md new file mode 100644 index 00000000000000..cc7304f533b9b5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_romanian_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_english_romanian_cased BertEmbeddings from Geotrend +author: John Snow Labs +name: bert_base_english_romanian_cased +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_english_romanian_cased` is a English model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_english_romanian_cased_en_5.1.1_3.0_1694555022603.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_english_romanian_cased_en_5.1.1_3.0_1694555022603.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_english_romanian_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_english_romanian_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_english_romanian_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|413.3 MB| + +## References + +https://huggingface.co/Geotrend/bert-base-en-ro-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_spanish_cased_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_spanish_cased_en.md new file mode 100644 index 00000000000000..1053c12c4e75ca --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_spanish_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_english_spanish_cased BertEmbeddings from Geotrend +author: John Snow Labs +name: bert_base_english_spanish_cased +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_english_spanish_cased` is a English model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_english_spanish_cased_en_5.1.1_3.0_1694551237571.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_english_spanish_cased_en_5.1.1_3.0_1694551237571.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_english_spanish_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_english_spanish_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_english_spanish_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|422.2 MB| + +## References + +https://huggingface.co/Geotrend/bert-base-en-es-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_spanish_chinese_cased_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_spanish_chinese_cased_en.md new file mode 100644 index 00000000000000..a45780d7531714 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_spanish_chinese_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_english_spanish_chinese_cased BertEmbeddings from Geotrend +author: John Snow Labs +name: bert_base_english_spanish_chinese_cased +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_english_spanish_chinese_cased` is a English model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_english_spanish_chinese_cased_en_5.1.1_3.0_1694551698031.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_english_spanish_chinese_cased_en_5.1.1_3.0_1694551698031.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_english_spanish_chinese_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_english_spanish_chinese_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_english_spanish_chinese_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|441.3 MB| + +## References + +https://huggingface.co/Geotrend/bert-base-en-es-zh-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_spanish_italian_cased_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_spanish_italian_cased_en.md new file mode 100644 index 00000000000000..b6b4c68f82395d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_spanish_italian_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_english_spanish_italian_cased BertEmbeddings from Geotrend +author: John Snow Labs +name: bert_base_english_spanish_italian_cased +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_english_spanish_italian_cased` is a English model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_english_spanish_italian_cased_en_5.1.1_3.0_1694551412882.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_english_spanish_italian_cased_en_5.1.1_3.0_1694551412882.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_english_spanish_italian_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_english_spanish_italian_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_english_spanish_italian_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|431.6 MB| + +## References + +https://huggingface.co/Geotrend/bert-base-en-es-it-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_spanish_portuguese_cased_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_spanish_portuguese_cased_en.md new file mode 100644 index 00000000000000..bd6389c9cac801 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_spanish_portuguese_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_english_spanish_portuguese_cased BertEmbeddings from Geotrend +author: John Snow Labs +name: bert_base_english_spanish_portuguese_cased +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_english_spanish_portuguese_cased` is a English model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_english_spanish_portuguese_cased_en_5.1.1_3.0_1694551563082.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_english_spanish_portuguese_cased_en_5.1.1_3.0_1694551563082.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_english_spanish_portuguese_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_english_spanish_portuguese_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_english_spanish_portuguese_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|428.0 MB| + +## References + +https://huggingface.co/Geotrend/bert-base-en-es-pt-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_swahili_cased_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_swahili_cased_en.md new file mode 100644 index 00000000000000..e87b3d8cf83294 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_swahili_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_english_swahili_cased BertEmbeddings from Geotrend +author: John Snow Labs +name: bert_base_english_swahili_cased +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_english_swahili_cased` is a English model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_english_swahili_cased_en_5.1.1_3.0_1694555358059.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_english_swahili_cased_en_5.1.1_3.0_1694555358059.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_english_swahili_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_english_swahili_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_english_swahili_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|408.7 MB| + +## References + +https://huggingface.co/Geotrend/bert-base-en-sw-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_thai_cased_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_thai_cased_en.md new file mode 100644 index 00000000000000..f6aa44acd672a9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_thai_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_english_thai_cased BertEmbeddings from Geotrend +author: John Snow Labs +name: bert_base_english_thai_cased +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_english_thai_cased` is a English model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_english_thai_cased_en_5.1.1_3.0_1694555543000.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_english_thai_cased_en_5.1.1_3.0_1694555543000.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_english_thai_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_english_thai_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_english_thai_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|404.3 MB| + +## References + +https://huggingface.co/Geotrend/bert-base-en-th-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_ukrainian_cased_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_ukrainian_cased_en.md new file mode 100644 index 00000000000000..95024470e16b7f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_ukrainian_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_english_ukrainian_cased BertEmbeddings from Geotrend +author: John Snow Labs +name: bert_base_english_ukrainian_cased +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_english_ukrainian_cased` is a English model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_english_ukrainian_cased_en_5.1.1_3.0_1694555899898.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_english_ukrainian_cased_en_5.1.1_3.0_1694555899898.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_english_ukrainian_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_english_ukrainian_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_english_ukrainian_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|422.5 MB| + +## References + +https://huggingface.co/Geotrend/bert-base-en-uk-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_urdu_cased_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_urdu_cased_en.md new file mode 100644 index 00000000000000..a7f82d7e7ca9a2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_urdu_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_english_urdu_cased BertEmbeddings from Geotrend +author: John Snow Labs +name: bert_base_english_urdu_cased +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_english_urdu_cased` is a English model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_english_urdu_cased_en_5.1.1_3.0_1694556060152.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_english_urdu_cased_en_5.1.1_3.0_1694556060152.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_english_urdu_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_english_urdu_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_english_urdu_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|410.7 MB| + +## References + +https://huggingface.co/Geotrend/bert-base-en-ur-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_vietnamese_cased_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_vietnamese_cased_en.md new file mode 100644 index 00000000000000..d28523b7e7bd4e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_vietnamese_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_english_vietnamese_cased BertEmbeddings from Geotrend +author: John Snow Labs +name: bert_base_english_vietnamese_cased +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_english_vietnamese_cased` is a English model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_english_vietnamese_cased_en_5.1.1_3.0_1694556202106.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_english_vietnamese_cased_en_5.1.1_3.0_1694556202106.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_english_vietnamese_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_english_vietnamese_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_english_vietnamese_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|410.8 MB| + +## References + +https://huggingface.co/Geotrend/bert-base-en-vi-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_galician_cased_gl.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_galician_cased_gl.md new file mode 100644 index 00000000000000..3683260ef29e77 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_galician_cased_gl.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Galician bert_base_galician_cased BertEmbeddings from marcosgg +author: John Snow Labs +name: bert_base_galician_cased +date: 2023-09-12 +tags: [bert, gl, open_source, fill_mask, onnx] +task: Embeddings +language: gl +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_galician_cased` is a Galician model originally trained by marcosgg. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_galician_cased_gl_5.1.1_3.0_1694551516731.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_galician_cased_gl_5.1.1_3.0_1694551516731.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_galician_cased","gl") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_galician_cased", "gl") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_galician_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|gl| +|Size:|664.5 MB| + +## References + +https://huggingface.co/marcosgg/bert-base-gl-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_german_dbmdz_uncased_german_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_german_dbmdz_uncased_german_en.md new file mode 100644 index 00000000000000..3e20cf82c3cde5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_german_dbmdz_uncased_german_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_german_dbmdz_uncased_german BertEmbeddings from koala +author: John Snow Labs +name: bert_base_german_dbmdz_uncased_german +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_german_dbmdz_uncased_german` is a English model originally trained by koala. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_german_dbmdz_uncased_german_en_5.1.1_3.0_1694508300297.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_german_dbmdz_uncased_german_en_5.1.1_3.0_1694508300297.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_german_dbmdz_uncased_german","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_german_dbmdz_uncased_german", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_german_dbmdz_uncased_german| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|409.9 MB| + +## References + +https://huggingface.co/koala/bert-base-german-dbmdz-uncased-de \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_greek_uncased_v1_el.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_greek_uncased_v1_el.md new file mode 100644 index 00000000000000..2a200fdac65287 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_greek_uncased_v1_el.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Modern Greek (1453-) bert_base_greek_uncased_v1 BertEmbeddings from nlpaueb +author: John Snow Labs +name: bert_base_greek_uncased_v1 +date: 2023-09-12 +tags: [bert, el, open_source, fill_mask, onnx] +task: Embeddings +language: el +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_greek_uncased_v1` is a Modern Greek (1453-) model originally trained by nlpaueb. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_greek_uncased_v1_el_5.1.1_3.0_1694561091912.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_greek_uncased_v1_el_5.1.1_3.0_1694561091912.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_greek_uncased_v1","el") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_greek_uncased_v1", "el") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_greek_uncased_v1| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|el| +|Size:|421.1 MB| + +## References + +https://huggingface.co/nlpaueb/bert-base-greek-uncased-v1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_ko.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_ko.md new file mode 100644 index 00000000000000..97429141244fd0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_ko.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Korean bert_base BertEmbeddings from klue +author: John Snow Labs +name: bert_base +date: 2023-09-12 +tags: [bert, ko, open_source, fill_mask, onnx] +task: Embeddings +language: ko +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base` is a Korean model originally trained by klue. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_ko_5.1.1_3.0_1694508175577.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_ko_5.1.1_3.0_1694508175577.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base","ko") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base", "ko") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|ko| +|Size:|412.4 MB| + +## References + +https://huggingface.co/klue/bert-base \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_parsbert_uncased_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_parsbert_uncased_en.md new file mode 100644 index 00000000000000..79a8300797a022 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_parsbert_uncased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_parsbert_uncased BertEmbeddings from HooshvareLab +author: John Snow Labs +name: bert_base_parsbert_uncased +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_parsbert_uncased` is a English model originally trained by HooshvareLab. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_parsbert_uncased_en_5.1.1_3.0_1694559679646.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_parsbert_uncased_en_5.1.1_3.0_1694559679646.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_parsbert_uncased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_parsbert_uncased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_parsbert_uncased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|606.4 MB| + +## References + +https://huggingface.co/HooshvareLab/bert-base-parsbert-uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_parsbert_uncased_finetuned_conditioned_khorshid_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_parsbert_uncased_finetuned_conditioned_khorshid_en.md new file mode 100644 index 00000000000000..e117fc540fd68e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_parsbert_uncased_finetuned_conditioned_khorshid_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_parsbert_uncased_finetuned_conditioned_khorshid BertEmbeddings from Hamid-reza +author: John Snow Labs +name: bert_base_parsbert_uncased_finetuned_conditioned_khorshid +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_parsbert_uncased_finetuned_conditioned_khorshid` is a English model originally trained by Hamid-reza. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_parsbert_uncased_finetuned_conditioned_khorshid_en_5.1.1_3.0_1694552930772.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_parsbert_uncased_finetuned_conditioned_khorshid_en_5.1.1_3.0_1694552930772.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_parsbert_uncased_finetuned_conditioned_khorshid","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_parsbert_uncased_finetuned_conditioned_khorshid", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_parsbert_uncased_finetuned_conditioned_khorshid| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|606.3 MB| + +## References + +https://huggingface.co/Hamid-reza/bert-base-parsbert-uncased-finetuned-conditioned-khorshid \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_parsbert_uncased_finetuned_khorshid_accelerate_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_parsbert_uncased_finetuned_khorshid_accelerate_en.md new file mode 100644 index 00000000000000..854f5c741285c9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_parsbert_uncased_finetuned_khorshid_accelerate_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_parsbert_uncased_finetuned_khorshid_accelerate BertEmbeddings from Hamid-reza +author: John Snow Labs +name: bert_base_parsbert_uncased_finetuned_khorshid_accelerate +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_parsbert_uncased_finetuned_khorshid_accelerate` is a English model originally trained by Hamid-reza. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_parsbert_uncased_finetuned_khorshid_accelerate_en_5.1.1_3.0_1694551419310.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_parsbert_uncased_finetuned_khorshid_accelerate_en_5.1.1_3.0_1694551419310.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_parsbert_uncased_finetuned_khorshid_accelerate","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_parsbert_uncased_finetuned_khorshid_accelerate", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_parsbert_uncased_finetuned_khorshid_accelerate| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|606.3 MB| + +## References + +https://huggingface.co/Hamid-reza/bert-base-parsbert-uncased-finetuned-khorshid-accelerate \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_parsbert_uncased_finetuned_khorshid_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_parsbert_uncased_finetuned_khorshid_en.md new file mode 100644 index 00000000000000..dd74fe05f010f9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_parsbert_uncased_finetuned_khorshid_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_parsbert_uncased_finetuned_khorshid BertEmbeddings from Hamid-reza +author: John Snow Labs +name: bert_base_parsbert_uncased_finetuned_khorshid +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_parsbert_uncased_finetuned_khorshid` is a English model originally trained by Hamid-reza. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_parsbert_uncased_finetuned_khorshid_en_5.1.1_3.0_1694551198123.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_parsbert_uncased_finetuned_khorshid_en_5.1.1_3.0_1694551198123.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_parsbert_uncased_finetuned_khorshid","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_parsbert_uncased_finetuned_khorshid", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_parsbert_uncased_finetuned_khorshid| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|606.4 MB| + +## References + +https://huggingface.co/Hamid-reza/bert-base-parsbert-uncased-finetuned-khorshid \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_portuguese_cased_finetuned_acordao_v2_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_portuguese_cased_finetuned_acordao_v2_en.md new file mode 100644 index 00000000000000..44b9287425c3f5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_portuguese_cased_finetuned_acordao_v2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_portuguese_cased_finetuned_acordao_v2 BertEmbeddings from ederkamphorst +author: John Snow Labs +name: bert_base_portuguese_cased_finetuned_acordao_v2 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_portuguese_cased_finetuned_acordao_v2` is a English model originally trained by ederkamphorst. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_portuguese_cased_finetuned_acordao_v2_en_5.1.1_3.0_1694508805037.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_portuguese_cased_finetuned_acordao_v2_en_5.1.1_3.0_1694508805037.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_portuguese_cased_finetuned_acordao_v2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_portuguese_cased_finetuned_acordao_v2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_portuguese_cased_finetuned_acordao_v2| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|405.9 MB| + +## References + +https://huggingface.co/ederkamphorst/bert-base-portuguese-cased-finetuned-acordao_v2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_portuguese_cased_neuralmind_pt.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_portuguese_cased_neuralmind_pt.md new file mode 100644 index 00000000000000..edd95378a27a10 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_portuguese_cased_neuralmind_pt.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Portuguese bert_base_portuguese_cased_neuralmind BertEmbeddings from neuralmind +author: John Snow Labs +name: bert_base_portuguese_cased_neuralmind +date: 2023-09-12 +tags: [bert, pt, open_source, fill_mask, onnx] +task: Embeddings +language: pt +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_portuguese_cased_neuralmind` is a Portuguese model originally trained by neuralmind. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_portuguese_cased_neuralmind_pt_5.1.1_3.0_1694557881484.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_portuguese_cased_neuralmind_pt_5.1.1_3.0_1694557881484.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_portuguese_cased_neuralmind","pt") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_portuguese_cased_neuralmind", "pt") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_portuguese_cased_neuralmind| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|pt| +|Size:|405.9 MB| + +## References + +https://huggingface.co/neuralmind/bert-base-portuguese-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_swedish_cased_kb_sv.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_swedish_cased_kb_sv.md new file mode 100644 index 00000000000000..23250a730a4ea3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_swedish_cased_kb_sv.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Swedish bert_base_swedish_cased_kb BertEmbeddings from KB +author: John Snow Labs +name: bert_base_swedish_cased_kb +date: 2023-09-12 +tags: [bert, sv, open_source, fill_mask, onnx] +task: Embeddings +language: sv +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_swedish_cased_kb` is a Swedish model originally trained by KB. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_swedish_cased_kb_sv_5.1.1_3.0_1694510194246.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_swedish_cased_kb_sv_5.1.1_3.0_1694510194246.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_swedish_cased_kb","sv") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_swedish_cased_kb", "sv") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_swedish_cased_kb| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|sv| +|Size:|465.2 MB| + +## References + +https://huggingface.co/KB/bert-base-swedish-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_tapt_govreport_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_tapt_govreport_en.md new file mode 100644 index 00000000000000..a31f17cef5f6b8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_tapt_govreport_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_tapt_govreport BertEmbeddings from eliolio +author: John Snow Labs +name: bert_base_tapt_govreport +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_tapt_govreport` is a English model originally trained by eliolio. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_tapt_govreport_en_5.1.1_3.0_1694562890971.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_tapt_govreport_en_5.1.1_3.0_1694562890971.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_tapt_govreport","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_tapt_govreport", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_tapt_govreport| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.6 MB| + +## References + +https://huggingface.co/eliolio/bert-base-tapt-govreport \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_uncased_echr_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_uncased_echr_en.md new file mode 100644 index 00000000000000..fc5fa653d2dea5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_uncased_echr_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_echr BertEmbeddings from nlpaueb +author: John Snow Labs +name: bert_base_uncased_echr +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_echr` is a English model originally trained by nlpaueb. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_echr_en_5.1.1_3.0_1694561249416.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_echr_en_5.1.1_3.0_1694561249416.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_echr","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_echr", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_echr| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/nlpaueb/bert-base-uncased-echr \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_uncased_englishlawai_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_uncased_englishlawai_en.md new file mode 100644 index 00000000000000..02d277b3a73897 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_uncased_englishlawai_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_englishlawai BertEmbeddings from Makabaka +author: John Snow Labs +name: bert_base_uncased_englishlawai +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_englishlawai` is a English model originally trained by Makabaka. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_englishlawai_en_5.1.1_3.0_1694550163867.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_englishlawai_en_5.1.1_3.0_1694550163867.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_englishlawai","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_englishlawai", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_englishlawai| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/Makabaka/bert-base-uncased-EnglishLawAI \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_uncased_eurlex_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_uncased_eurlex_en.md new file mode 100644 index 00000000000000..9460a87cc03e1c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_uncased_eurlex_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_eurlex BertEmbeddings from nlpaueb +author: John Snow Labs +name: bert_base_uncased_eurlex +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_eurlex` is a English model originally trained by nlpaueb. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_eurlex_en_5.1.1_3.0_1694561402834.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_eurlex_en_5.1.1_3.0_1694561402834.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_eurlex","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_eurlex", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_eurlex| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/nlpaueb/bert-base-uncased-eurlex \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_uncased_finetuned_academic_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_uncased_finetuned_academic_en.md new file mode 100644 index 00000000000000..00b7d46ae5899b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_uncased_finetuned_academic_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_finetuned_academic BertEmbeddings from egumasa +author: John Snow Labs +name: bert_base_uncased_finetuned_academic +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_finetuned_academic` is a English model originally trained by egumasa. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_finetuned_academic_en_5.1.1_3.0_1694555134262.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_finetuned_academic_en_5.1.1_3.0_1694555134262.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_finetuned_academic","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_finetuned_academic", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_finetuned_academic| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/egumasa/bert-base-uncased-finetuned-academic \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_uncased_finetuned_crypto_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_uncased_finetuned_crypto_en.md new file mode 100644 index 00000000000000..5d06344969a462 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_uncased_finetuned_crypto_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_finetuned_crypto BertEmbeddings from smarquie +author: John Snow Labs +name: bert_base_uncased_finetuned_crypto +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_finetuned_crypto` is a English model originally trained by smarquie. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_finetuned_crypto_en_5.1.1_3.0_1694562492423.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_finetuned_crypto_en_5.1.1_3.0_1694562492423.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_finetuned_crypto","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_finetuned_crypto", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_finetuned_crypto| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/smarquie/bert-base-uncased-finetuned-crypto \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_uncased_gujarati_128_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_uncased_gujarati_128_en.md new file mode 100644 index 00000000000000..c74ee99a926635 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_uncased_gujarati_128_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_gujarati_128 BertEmbeddings from mischi001 +author: John Snow Labs +name: bert_base_uncased_gujarati_128 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_gujarati_128` is a English model originally trained by mischi001. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_gujarati_128_en_5.1.1_3.0_1694510645737.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_gujarati_128_en_5.1.1_3.0_1694510645737.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_gujarati_128","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_gujarati_128", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_gujarati_128| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/mischi001/bert-base-uncased-gu-128 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_uncased_issues_128_dongyeop_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_uncased_issues_128_dongyeop_en.md new file mode 100644 index 00000000000000..7ed36aba92f77e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_uncased_issues_128_dongyeop_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_issues_128_dongyeop BertEmbeddings from Dongyeop +author: John Snow Labs +name: bert_base_uncased_issues_128_dongyeop +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_issues_128_dongyeop` is a English model originally trained by Dongyeop. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_issues_128_dongyeop_en_5.1.1_3.0_1694510297896.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_issues_128_dongyeop_en_5.1.1_3.0_1694510297896.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_issues_128_dongyeop","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_issues_128_dongyeop", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_issues_128_dongyeop| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/Dongyeop/bert-base-uncased-issues-128 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_uncased_issues_128_hrayrmsint_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_uncased_issues_128_hrayrmsint_en.md new file mode 100644 index 00000000000000..27bf3aaa32a0d2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_uncased_issues_128_hrayrmsint_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_issues_128_hrayrmsint BertEmbeddings from HrayrMSint +author: John Snow Labs +name: bert_base_uncased_issues_128_hrayrmsint +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_issues_128_hrayrmsint` is a English model originally trained by HrayrMSint. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_issues_128_hrayrmsint_en_5.1.1_3.0_1694549539463.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_issues_128_hrayrmsint_en_5.1.1_3.0_1694549539463.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_issues_128_hrayrmsint","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_issues_128_hrayrmsint", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_issues_128_hrayrmsint| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/HrayrMSint/bert-base-uncased-issues-128 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_uncased_issues_128_isaacp_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_uncased_issues_128_isaacp_en.md new file mode 100644 index 00000000000000..e1cc7c79e1f8da --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_uncased_issues_128_isaacp_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_issues_128_isaacp BertEmbeddings from Isaacp +author: John Snow Labs +name: bert_base_uncased_issues_128_isaacp +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_issues_128_isaacp` is a English model originally trained by Isaacp. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_issues_128_isaacp_en_5.1.1_3.0_1694508451060.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_issues_128_isaacp_en_5.1.1_3.0_1694508451060.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_issues_128_isaacp","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_issues_128_isaacp", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_issues_128_isaacp| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/Isaacp/bert-base-uncased-issues-128 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_uncased_issues_128_issues_128_lvwerra_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_uncased_issues_128_issues_128_lvwerra_en.md new file mode 100644 index 00000000000000..38347519819ad8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_uncased_issues_128_issues_128_lvwerra_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_issues_128_issues_128_lvwerra BertEmbeddings from lvwerra +author: John Snow Labs +name: bert_base_uncased_issues_128_issues_128_lvwerra +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_issues_128_issues_128_lvwerra` is a English model originally trained by lvwerra. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_issues_128_issues_128_lvwerra_en_5.1.1_3.0_1694550270689.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_issues_128_issues_128_lvwerra_en_5.1.1_3.0_1694550270689.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_issues_128_issues_128_lvwerra","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_issues_128_issues_128_lvwerra", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_issues_128_issues_128_lvwerra| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/lvwerra/bert-base-uncased-issues-128-issues-128 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_uncased_issues_128_jangmin_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_uncased_issues_128_jangmin_en.md new file mode 100644 index 00000000000000..f1ad86721b3e5f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_uncased_issues_128_jangmin_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_issues_128_jangmin BertEmbeddings from jangmin +author: John Snow Labs +name: bert_base_uncased_issues_128_jangmin +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_issues_128_jangmin` is a English model originally trained by jangmin. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_issues_128_jangmin_en_5.1.1_3.0_1694554761560.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_issues_128_jangmin_en_5.1.1_3.0_1694554761560.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_issues_128_jangmin","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_issues_128_jangmin", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_issues_128_jangmin| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/jangmin/bert-base-uncased-issues-128 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_uncased_issues_128_junghun_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_uncased_issues_128_junghun_en.md new file mode 100644 index 00000000000000..e259aa105c6eb1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_uncased_issues_128_junghun_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_issues_128_junghun BertEmbeddings from JungHun +author: John Snow Labs +name: bert_base_uncased_issues_128_junghun +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_issues_128_junghun` is a English model originally trained by JungHun. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_issues_128_junghun_en_5.1.1_3.0_1694556714538.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_issues_128_junghun_en_5.1.1_3.0_1694556714538.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_issues_128_junghun","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_issues_128_junghun", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_issues_128_junghun| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/JungHun/bert-base-uncased-issues-128 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_uncased_issues_128_jx7789_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_uncased_issues_128_jx7789_en.md new file mode 100644 index 00000000000000..315c9204483781 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_uncased_issues_128_jx7789_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_issues_128_jx7789 BertEmbeddings from jx7789 +author: John Snow Labs +name: bert_base_uncased_issues_128_jx7789 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_issues_128_jx7789` is a English model originally trained by jx7789. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_issues_128_jx7789_en_5.1.1_3.0_1694555592115.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_issues_128_jx7789_en_5.1.1_3.0_1694555592115.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_issues_128_jx7789","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_issues_128_jx7789", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_issues_128_jx7789| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/jx7789/bert-base-uncased-issues-128 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_uncased_issues_128_transll_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_uncased_issues_128_transll_en.md new file mode 100644 index 00000000000000..48de853cec6289 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_uncased_issues_128_transll_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_issues_128_transll BertEmbeddings from TransLL +author: John Snow Labs +name: bert_base_uncased_issues_128_transll +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_issues_128_transll` is a English model originally trained by TransLL. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_issues_128_transll_en_5.1.1_3.0_1694550129029.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_issues_128_transll_en_5.1.1_3.0_1694550129029.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_issues_128_transll","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_issues_128_transll", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_issues_128_transll| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/TransLL/bert-base-uncased-issues-128 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_uncased_mnli_sparse_70_unstructured_norwegian_classifier_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_uncased_mnli_sparse_70_unstructured_norwegian_classifier_en.md new file mode 100644 index 00000000000000..abe6f9dcf72fe4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_uncased_mnli_sparse_70_unstructured_norwegian_classifier_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_mnli_sparse_70_unstructured_norwegian_classifier BertEmbeddings from Intel +author: John Snow Labs +name: bert_base_uncased_mnli_sparse_70_unstructured_norwegian_classifier +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_mnli_sparse_70_unstructured_norwegian_classifier` is a English model originally trained by Intel. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_mnli_sparse_70_unstructured_norwegian_classifier_en_5.1.1_3.0_1694559998864.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_mnli_sparse_70_unstructured_norwegian_classifier_en_5.1.1_3.0_1694559998864.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_mnli_sparse_70_unstructured_norwegian_classifier","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_mnli_sparse_70_unstructured_norwegian_classifier", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_mnli_sparse_70_unstructured_norwegian_classifier| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|225.9 MB| + +## References + +https://huggingface.co/Intel/bert-base-uncased-mnli-sparse-70-unstructured-no-classifier \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_uncased_rahuldave_issues_128_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_uncased_rahuldave_issues_128_en.md new file mode 100644 index 00000000000000..9357261bb64a02 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_uncased_rahuldave_issues_128_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_rahuldave_issues_128 BertEmbeddings from rahuldave +author: John Snow Labs +name: bert_base_uncased_rahuldave_issues_128 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_rahuldave_issues_128` is a English model originally trained by rahuldave. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_rahuldave_issues_128_en_5.1.1_3.0_1694553801165.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_rahuldave_issues_128_en_5.1.1_3.0_1694553801165.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_rahuldave_issues_128","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_rahuldave_issues_128", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_rahuldave_issues_128| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/rahuldave/bert-base-uncased-rahuldave-issues-128 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_uncased_sparse_70_unstructured_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_uncased_sparse_70_unstructured_en.md new file mode 100644 index 00000000000000..f8578c727b7524 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_uncased_sparse_70_unstructured_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_sparse_70_unstructured BertEmbeddings from Intel +author: John Snow Labs +name: bert_base_uncased_sparse_70_unstructured +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_sparse_70_unstructured` is a English model originally trained by Intel. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_sparse_70_unstructured_en_5.1.1_3.0_1694560206490.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_sparse_70_unstructured_en_5.1.1_3.0_1694560206490.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_sparse_70_unstructured","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_sparse_70_unstructured", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_sparse_70_unstructured| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|225.8 MB| + +## References + +https://huggingface.co/Intel/bert-base-uncased-sparse-70-unstructured \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_uncased_sparse_85_unstructured_pruneofa_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_uncased_sparse_85_unstructured_pruneofa_en.md new file mode 100644 index 00000000000000..6f7d439896013e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_uncased_sparse_85_unstructured_pruneofa_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_sparse_85_unstructured_pruneofa BertEmbeddings from Intel +author: John Snow Labs +name: bert_base_uncased_sparse_85_unstructured_pruneofa +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_sparse_85_unstructured_pruneofa` is a English model originally trained by Intel. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_sparse_85_unstructured_pruneofa_en_5.1.1_3.0_1694560356013.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_sparse_85_unstructured_pruneofa_en_5.1.1_3.0_1694560356013.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_sparse_85_unstructured_pruneofa","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_sparse_85_unstructured_pruneofa", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_sparse_85_unstructured_pruneofa| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|175.6 MB| + +## References + +https://huggingface.co/Intel/bert-base-uncased-sparse-85-unstructured-pruneofa \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_uncased_sparse_90_unstructured_pruneofa_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_uncased_sparse_90_unstructured_pruneofa_en.md new file mode 100644 index 00000000000000..4a7bed5ce732b9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_uncased_sparse_90_unstructured_pruneofa_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_sparse_90_unstructured_pruneofa BertEmbeddings from Intel +author: John Snow Labs +name: bert_base_uncased_sparse_90_unstructured_pruneofa +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_sparse_90_unstructured_pruneofa` is a English model originally trained by Intel. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_sparse_90_unstructured_pruneofa_en_5.1.1_3.0_1694560534251.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_sparse_90_unstructured_pruneofa_en_5.1.1_3.0_1694560534251.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_sparse_90_unstructured_pruneofa","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_sparse_90_unstructured_pruneofa", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_sparse_90_unstructured_pruneofa| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|157.7 MB| + +## References + +https://huggingface.co/Intel/bert-base-uncased-sparse-90-unstructured-pruneofa \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_uncased_transformers_github_128_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_uncased_transformers_github_128_en.md new file mode 100644 index 00000000000000..7d61bfb913f5c2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_uncased_transformers_github_128_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_transformers_github_128 BertEmbeddings from GV05 +author: John Snow Labs +name: bert_base_uncased_transformers_github_128 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_transformers_github_128` is a English model originally trained by GV05. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_transformers_github_128_en_5.1.1_3.0_1694554427020.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_transformers_github_128_en_5.1.1_3.0_1694554427020.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_transformers_github_128","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_transformers_github_128", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_transformers_github_128| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/GV05/bert-base-uncased-transformers-github-128 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_vietnamese_uncased_vi.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_vietnamese_uncased_vi.md new file mode 100644 index 00000000000000..a399d814a7b57f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_vietnamese_uncased_vi.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Vietnamese bert_base_vietnamese_uncased BertEmbeddings from tintnguyen +author: John Snow Labs +name: bert_base_vietnamese_uncased +date: 2023-09-12 +tags: [bert, vi, open_source, fill_mask, onnx] +task: Embeddings +language: vi +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_vietnamese_uncased` is a Vietnamese model originally trained by tintnguyen. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_vietnamese_uncased_vi_5.1.1_3.0_1694551029030.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_vietnamese_uncased_vi_5.1.1_3.0_1694551029030.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_vietnamese_uncased","vi") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_vietnamese_uncased", "vi") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_vietnamese_uncased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|vi| +|Size:|536.8 MB| + +## References + +https://huggingface.co/tintnguyen/bert-base-vi-uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_wikihow_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_wikihow_en.md new file mode 100644 index 00000000000000..fe2333ebca4769 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_wikihow_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_wikihow BertEmbeddings from Aktsvigun +author: John Snow Labs +name: bert_base_wikihow +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_wikihow` is a English model originally trained by Aktsvigun. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_wikihow_en_5.1.1_3.0_1694553949077.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_wikihow_en_5.1.1_3.0_1694553949077.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_wikihow","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_wikihow", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_wikihow| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/Aktsvigun/bert-base-wikihow \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_c1_english_german_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_c1_english_german_en.md new file mode 100644 index 00000000000000..d96fe467741491 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_c1_english_german_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_c1_english_german BertEmbeddings from OpenSemShift +author: John Snow Labs +name: bert_c1_english_german +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_c1_english_german` is a English model originally trained by OpenSemShift. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_c1_english_german_en_5.1.1_3.0_1694558764674.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_c1_english_german_en_5.1.1_3.0_1694558764674.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_c1_english_german","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_c1_english_german", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_c1_english_german| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|665.1 MB| + +## References + +https://huggingface.co/OpenSemShift/bert-c1-en-de \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_c2_english_german_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_c2_english_german_en.md new file mode 100644 index 00000000000000..f9bb973d3548b6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_c2_english_german_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_c2_english_german BertEmbeddings from OpenSemShift +author: John Snow Labs +name: bert_c2_english_german +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_c2_english_german` is a English model originally trained by OpenSemShift. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_c2_english_german_en_5.1.1_3.0_1694558981307.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_c2_english_german_en_5.1.1_3.0_1694558981307.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_c2_english_german","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_c2_english_german", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_c2_english_german| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|665.1 MB| + +## References + +https://huggingface.co/OpenSemShift/bert-c2-en-de \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_c2_english_only_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_c2_english_only_en.md new file mode 100644 index 00000000000000..4ae205f09173fc --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_c2_english_only_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_c2_english_only BertEmbeddings from OpenSemShift +author: John Snow Labs +name: bert_c2_english_only +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_c2_english_only` is a English model originally trained by OpenSemShift. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_c2_english_only_en_5.1.1_3.0_1694560907370.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_c2_english_only_en_5.1.1_3.0_1694560907370.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_c2_english_only","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_c2_english_only", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_c2_english_only| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|665.1 MB| + +## References + +https://huggingface.co/OpenSemShift/bert-c2-en-only \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_clinical_scratch_wl_spanish_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_clinical_scratch_wl_spanish_en.md new file mode 100644 index 00000000000000..bdee3e9038fe12 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_clinical_scratch_wl_spanish_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_clinical_scratch_wl_spanish BertEmbeddings from plncmm +author: John Snow Labs +name: bert_clinical_scratch_wl_spanish +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_clinical_scratch_wl_spanish` is a English model originally trained by plncmm. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_clinical_scratch_wl_spanish_en_5.1.1_3.0_1694549252966.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_clinical_scratch_wl_spanish_en_5.1.1_3.0_1694549252966.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_clinical_scratch_wl_spanish","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_clinical_scratch_wl_spanish", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_clinical_scratch_wl_spanish| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/plncmm/bert-clinical-scratch-wl-es \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_e_base_mlm_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_e_base_mlm_en.md new file mode 100644 index 00000000000000..7516ad99fa0fdc --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_e_base_mlm_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_e_base_mlm BertEmbeddings from nasa-impact +author: John Snow Labs +name: bert_e_base_mlm +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_e_base_mlm` is a English model originally trained by nasa-impact. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_e_base_mlm_en_5.1.1_3.0_1694557364630.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_e_base_mlm_en_5.1.1_3.0_1694557364630.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_e_base_mlm","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_e_base_mlm", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_e_base_mlm| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|409.9 MB| + +## References + +https://huggingface.co/nasa-impact/bert-e-base-mlm \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_emoji_latvian_twitter_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_emoji_latvian_twitter_en.md new file mode 100644 index 00000000000000..ead21b98623a7f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_emoji_latvian_twitter_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_emoji_latvian_twitter BertEmbeddings from FFZG-cleopatra +author: John Snow Labs +name: bert_emoji_latvian_twitter +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_emoji_latvian_twitter` is a English model originally trained by FFZG-cleopatra. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_emoji_latvian_twitter_en_5.1.1_3.0_1694548098064.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_emoji_latvian_twitter_en_5.1.1_3.0_1694548098064.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_emoji_latvian_twitter","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_emoji_latvian_twitter", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_emoji_latvian_twitter| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|664.2 MB| + +## References + +https://huggingface.co/FFZG-cleopatra/bert-emoji-latvian-twitter \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_finetuning_test_lian01110_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_finetuning_test_lian01110_en.md new file mode 100644 index 00000000000000..a6be7241b26233 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_finetuning_test_lian01110_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_finetuning_test_lian01110 BertEmbeddings from lian01110 +author: John Snow Labs +name: bert_finetuning_test_lian01110 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_finetuning_test_lian01110` is a English model originally trained by lian01110. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_finetuning_test_lian01110_en_5.1.1_3.0_1694548658763.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_finetuning_test_lian01110_en_5.1.1_3.0_1694548658763.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_finetuning_test_lian01110","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_finetuning_test_lian01110", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_finetuning_test_lian01110| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/lian01110/bert_finetuning_test \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_for_finacial_triples_completion_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_for_finacial_triples_completion_en.md new file mode 100644 index 00000000000000..10a56529d1883d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_for_finacial_triples_completion_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_for_finacial_triples_completion BertEmbeddings from reginaboateng +author: John Snow Labs +name: bert_for_finacial_triples_completion +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_for_finacial_triples_completion` is a English model originally trained by reginaboateng. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_for_finacial_triples_completion_en_5.1.1_3.0_1694549182533.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_for_finacial_triples_completion_en_5.1.1_3.0_1694549182533.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_for_finacial_triples_completion","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_for_finacial_triples_completion", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_for_finacial_triples_completion| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/reginaboateng/bert_for_finacial_triples_completion \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_funting_test_ai10_niepan_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_funting_test_ai10_niepan_en.md new file mode 100644 index 00000000000000..2ac0ed5a6a7c05 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_funting_test_ai10_niepan_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_funting_test_ai10_niepan BertEmbeddings from niepan +author: John Snow Labs +name: bert_funting_test_ai10_niepan +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_funting_test_ai10_niepan` is a English model originally trained by niepan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_funting_test_ai10_niepan_en_5.1.1_3.0_1694558288937.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_funting_test_ai10_niepan_en_5.1.1_3.0_1694558288937.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_funting_test_ai10_niepan","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_funting_test_ai10_niepan", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_funting_test_ai10_niepan| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/niepan/bert_funting_test_ai10 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_galician_gl.md b/docs/_posts/ahmedlone127/2023-09-12-bert_galician_gl.md new file mode 100644 index 00000000000000..c45ebe51884458 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_galician_gl.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Galician bert_galician BertEmbeddings from fpuentes +author: John Snow Labs +name: bert_galician +date: 2023-09-12 +tags: [bert, gl, open_source, fill_mask, onnx] +task: Embeddings +language: gl +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_galician` is a Galician model originally trained by fpuentes. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_galician_gl_5.1.1_3.0_1694550784049.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_galician_gl_5.1.1_3.0_1694550784049.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_galician","gl") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_galician", "gl") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_galician| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|gl| +|Size:|409.0 MB| + +## References + +https://huggingface.co/fpuentes/bert-galician \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_hateful_memes_expanded_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_hateful_memes_expanded_en.md new file mode 100644 index 00000000000000..59b1251b4d0c10 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_hateful_memes_expanded_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_hateful_memes_expanded BertEmbeddings from limjiayi +author: John Snow Labs +name: bert_hateful_memes_expanded +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_hateful_memes_expanded` is a English model originally trained by limjiayi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_hateful_memes_expanded_en_5.1.1_3.0_1694548862078.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_hateful_memes_expanded_en_5.1.1_3.0_1694548862078.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_hateful_memes_expanded","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_hateful_memes_expanded", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_hateful_memes_expanded| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/limjiayi/bert-hateful-memes-expanded \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_java_bfp_combined_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_java_bfp_combined_en.md new file mode 100644 index 00000000000000..ef3ec9b7562b4d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_java_bfp_combined_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_java_bfp_combined BertEmbeddings from up201806461 +author: John Snow Labs +name: bert_java_bfp_combined +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_java_bfp_combined` is a English model originally trained by up201806461. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_java_bfp_combined_en_5.1.1_3.0_1694562355805.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_java_bfp_combined_en_5.1.1_3.0_1694562355805.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_java_bfp_combined","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_java_bfp_combined", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_java_bfp_combined| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.6 MB| + +## References + +https://huggingface.co/up201806461/bert-java-bfp_combined \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_khmer_base_uncased_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_khmer_base_uncased_en.md new file mode 100644 index 00000000000000..09a77ee8a25bf0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_khmer_base_uncased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_khmer_base_uncased BertEmbeddings from GKLMIP +author: John Snow Labs +name: bert_khmer_base_uncased +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_khmer_base_uncased` is a English model originally trained by GKLMIP. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_khmer_base_uncased_en_5.1.1_3.0_1694548567530.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_khmer_base_uncased_en_5.1.1_3.0_1694548567530.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_khmer_base_uncased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_khmer_base_uncased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_khmer_base_uncased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|413.0 MB| + +## References + +https://huggingface.co/GKLMIP/bert-khmer-base-uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_khmer_base_uncased_tokenized_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_khmer_base_uncased_tokenized_en.md new file mode 100644 index 00000000000000..065f90f832fbd7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_khmer_base_uncased_tokenized_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_khmer_base_uncased_tokenized BertEmbeddings from GKLMIP +author: John Snow Labs +name: bert_khmer_base_uncased_tokenized +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_khmer_base_uncased_tokenized` is a English model originally trained by GKLMIP. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_khmer_base_uncased_tokenized_en_5.1.1_3.0_1694548424527.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_khmer_base_uncased_tokenized_en_5.1.1_3.0_1694548424527.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_khmer_base_uncased_tokenized","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_khmer_base_uncased_tokenized", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_khmer_base_uncased_tokenized| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|412.9 MB| + +## References + +https://huggingface.co/GKLMIP/bert-khmer-base-uncased-tokenized \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_khmer_small_uncased_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_khmer_small_uncased_en.md new file mode 100644 index 00000000000000..5c0910e0b64983 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_khmer_small_uncased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_khmer_small_uncased BertEmbeddings from GKLMIP +author: John Snow Labs +name: bert_khmer_small_uncased +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_khmer_small_uncased` is a English model originally trained by GKLMIP. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_khmer_small_uncased_en_5.1.1_3.0_1694548748669.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_khmer_small_uncased_en_5.1.1_3.0_1694548748669.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_khmer_small_uncased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_khmer_small_uncased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_khmer_small_uncased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|110.2 MB| + +## References + +https://huggingface.co/GKLMIP/bert-khmer-small-uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_khmer_small_uncased_tokenized_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_khmer_small_uncased_tokenized_en.md new file mode 100644 index 00000000000000..b1106d77e59c95 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_khmer_small_uncased_tokenized_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_khmer_small_uncased_tokenized BertEmbeddings from GKLMIP +author: John Snow Labs +name: bert_khmer_small_uncased_tokenized +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_khmer_small_uncased_tokenized` is a English model originally trained by GKLMIP. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_khmer_small_uncased_tokenized_en_5.1.1_3.0_1694548658102.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_khmer_small_uncased_tokenized_en_5.1.1_3.0_1694548658102.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_khmer_small_uncased_tokenized","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_khmer_small_uncased_tokenized", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_khmer_small_uncased_tokenized| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|110.2 MB| + +## References + +https://huggingface.co/GKLMIP/bert-khmer-small-uncased-tokenized \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_korean_base_ko.md b/docs/_posts/ahmedlone127/2023-09-12-bert_korean_base_ko.md new file mode 100644 index 00000000000000..7bbf1ef5e79a2a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_korean_base_ko.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Korean bert_korean_base BertEmbeddings from lassl +author: John Snow Labs +name: bert_korean_base +date: 2023-09-12 +tags: [bert, ko, open_source, fill_mask, onnx] +task: Embeddings +language: ko +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_korean_base` is a Korean model originally trained by lassl. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_korean_base_ko_5.1.1_3.0_1694547622935.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_korean_base_ko_5.1.1_3.0_1694547622935.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_korean_base","ko") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_korean_base", "ko") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_korean_base| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|ko| +|Size:|467.8 MB| + +## References + +https://huggingface.co/lassl/bert-ko-base \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_korean_small_ko.md b/docs/_posts/ahmedlone127/2023-09-12-bert_korean_small_ko.md new file mode 100644 index 00000000000000..1f09b7aebc3cb3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_korean_small_ko.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Korean bert_korean_small BertEmbeddings from lassl +author: John Snow Labs +name: bert_korean_small +date: 2023-09-12 +tags: [bert, ko, open_source, fill_mask, onnx] +task: Embeddings +language: ko +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_korean_small` is a Korean model originally trained by lassl. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_korean_small_ko_5.1.1_3.0_1694547734588.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_korean_small_ko_5.1.1_3.0_1694547734588.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_korean_small","ko") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_korean_small", "ko") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_korean_small| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|ko| +|Size:|85.6 MB| + +## References + +https://huggingface.co/lassl/bert-ko-small \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_large_cased_english_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_large_cased_english_en.md new file mode 100644 index 00000000000000..5e62b1afb3b957 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_large_cased_english_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_english BertEmbeddings from koala +author: John Snow Labs +name: bert_large_cased_english +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_english` is a English model originally trained by koala. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_english_en_5.1.1_3.0_1694508535175.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_english_en_5.1.1_3.0_1694508535175.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_english","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_english", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_english| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/koala/bert-large-cased-en \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_large_cased_finetuned_prompt_20_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_large_cased_finetuned_prompt_20_en.md new file mode 100644 index 00000000000000..ba14f69c18358a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_large_cased_finetuned_prompt_20_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_finetuned_prompt_20 BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_cased_finetuned_prompt_20 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_finetuned_prompt_20` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_finetuned_prompt_20_en_5.1.1_3.0_1694560618058.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_finetuned_prompt_20_en_5.1.1_3.0_1694560618058.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_finetuned_prompt_20","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_finetuned_prompt_20", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_finetuned_prompt_20| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-cased-finetuned-prompt-20 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_large_cased_portuguese_law_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_large_cased_portuguese_law_en.md new file mode 100644 index 00000000000000..6bef6ea8d03272 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_large_cased_portuguese_law_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_portuguese_law BertEmbeddings from edwatanabe +author: John Snow Labs +name: bert_large_cased_portuguese_law +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_portuguese_law` is a English model originally trained by edwatanabe. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_portuguese_law_en_5.1.1_3.0_1694548002360.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_portuguese_law_en_5.1.1_3.0_1694548002360.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_portuguese_law","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_portuguese_law", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_portuguese_law| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/edwatanabe/bert-large-cased-pt-law \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_large_cased_sclarge_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_large_cased_sclarge_en.md new file mode 100644 index 00000000000000..825ce080100fc7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_large_cased_sclarge_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_sclarge BertEmbeddings from ZongqianLi +author: John Snow Labs +name: bert_large_cased_sclarge +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_sclarge` is a English model originally trained by ZongqianLi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_sclarge_en_5.1.1_3.0_1694562199422.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_sclarge_en_5.1.1_3.0_1694562199422.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_sclarge","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_sclarge", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_sclarge| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/ZongqianLi/bert_large_cased_sclarge \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_10_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_10_en.md new file mode 100644 index 00000000000000..8876631c16104c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_10_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_10 BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_10 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_10` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_10_en_5.1.1_3.0_1694508360095.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_10_en_5.1.1_3.0_1694508360095.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_10","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_10", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_10| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-cased-sigir-support-refute-no-label-40-2nd-test-LR10-8-fast-10 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_11_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_11_en.md new file mode 100644 index 00000000000000..8136d0ebbe7620 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_11_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_11 BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_11 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_11` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_11_en_5.1.1_3.0_1694509499566.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_11_en_5.1.1_3.0_1694509499566.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_11","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_11", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_11| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-cased-sigir-support-refute-no-label-40-2nd-test-LR10-8-fast-11 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_12_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_12_en.md new file mode 100644 index 00000000000000..1319cc27ddebfc --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_12_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_12 BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_12 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_12` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_12_en_5.1.1_3.0_1694510143392.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_12_en_5.1.1_3.0_1694510143392.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_12","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_12", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_12| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-cased-sigir-support-refute-no-label-40-2nd-test-LR10-8-fast-12 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_13_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_13_en.md new file mode 100644 index 00000000000000..638809f31231ff --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_13_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_13 BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_13 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_13` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_13_en_5.1.1_3.0_1694510416192.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_13_en_5.1.1_3.0_1694510416192.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_13","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_13", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_13| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-cased-sigir-support-refute-no-label-40-2nd-test-LR10-8-fast-13 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_14_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_14_en.md new file mode 100644 index 00000000000000..3ac5584420fff5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_14_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_14 BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_14 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_14` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_14_en_5.1.1_3.0_1694510686666.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_14_en_5.1.1_3.0_1694510686666.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_14","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_14", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_14| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-cased-sigir-support-refute-no-label-40-2nd-test-LR10-8-fast-14 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_15_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_15_en.md new file mode 100644 index 00000000000000..ccb7015fb49c59 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_15_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_15 BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_15 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_15` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_15_en_5.1.1_3.0_1694547583222.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_15_en_5.1.1_3.0_1694547583222.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_15","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_15", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_15| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-cased-sigir-support-refute-no-label-40-2nd-test-LR10-8-fast-15 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_16_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_16_en.md new file mode 100644 index 00000000000000..569a02cfdcf537 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_16_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_16 BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_16 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_16` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_16_en_5.1.1_3.0_1694547909959.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_16_en_5.1.1_3.0_1694547909959.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_16","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_16", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_16| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-cased-sigir-support-refute-no-label-40-2nd-test-LR10-8-fast-16 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_17_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_17_en.md new file mode 100644 index 00000000000000..efe1f33b166e14 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_17_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_17 BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_17 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_17` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_17_en_5.1.1_3.0_1694548216024.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_17_en_5.1.1_3.0_1694548216024.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_17","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_17", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_17| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-cased-sigir-support-refute-no-label-40-2nd-test-LR10-8-fast-17 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_18_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_18_en.md new file mode 100644 index 00000000000000..41a50555598393 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_18_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_18 BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_18 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_18` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_18_en_5.1.1_3.0_1694548498129.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_18_en_5.1.1_3.0_1694548498129.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_18","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_18", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_18| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-cased-sigir-support-refute-no-label-40-2nd-test-LR10-8-fast-18 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_19_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_19_en.md new file mode 100644 index 00000000000000..d66544b26af1ef --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_19_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_19 BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_19 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_19` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_19_en_5.1.1_3.0_1694548783818.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_19_en_5.1.1_3.0_1694548783818.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_19","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_19", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_19| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-cased-sigir-support-refute-no-label-40-2nd-test-LR10-8-fast-19 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_9_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_9_en.md new file mode 100644 index 00000000000000..267efcba9435ce --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_9_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_9 BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_9 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_9` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_9_en_5.1.1_3.0_1694508105221.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_9_en_5.1.1_3.0_1694508105221.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_9","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_9", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_9| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-cased-sigir-support-refute-no-label-40-2nd-test-LR10-8-fast-9 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_large_portuguese_cased_pt.md b/docs/_posts/ahmedlone127/2023-09-12-bert_large_portuguese_cased_pt.md new file mode 100644 index 00000000000000..3b5fd7ce34bba3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_large_portuguese_cased_pt.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Portuguese bert_large_portuguese_cased BertEmbeddings from neuralmind +author: John Snow Labs +name: bert_large_portuguese_cased +date: 2023-09-12 +tags: [bert, pt, open_source, fill_mask, onnx] +task: Embeddings +language: pt +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_portuguese_cased` is a Portuguese model originally trained by neuralmind. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_portuguese_cased_pt_5.1.1_3.0_1694558116929.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_portuguese_cased_pt_5.1.1_3.0_1694558116929.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_portuguese_cased","pt") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_portuguese_cased", "pt") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_portuguese_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|pt| +|Size:|1.2 GB| + +## References + +https://huggingface.co/neuralmind/bert-large-portuguese-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_large_uncased_bengali_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_large_uncased_bengali_en.md new file mode 100644 index 00000000000000..b839e31e6a0881 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_large_uncased_bengali_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_uncased_bengali BertEmbeddings from koala +author: John Snow Labs +name: bert_large_uncased_bengali +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_uncased_bengali` is a English model originally trained by koala. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_uncased_bengali_en_5.1.1_3.0_1694508782600.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_uncased_bengali_en_5.1.1_3.0_1694508782600.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_uncased_bengali","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_uncased_bengali", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_uncased_bengali| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/koala/bert-large-uncased-bn \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_large_uncased_chinese_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_large_uncased_chinese_en.md new file mode 100644 index 00000000000000..4635e55be80a82 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_large_uncased_chinese_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_uncased_chinese BertEmbeddings from koala +author: John Snow Labs +name: bert_large_uncased_chinese +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_uncased_chinese` is a English model originally trained by koala. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_uncased_chinese_en_5.1.1_3.0_1694510115547.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_uncased_chinese_en_5.1.1_3.0_1694510115547.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_uncased_chinese","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_uncased_chinese", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_uncased_chinese| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/koala/bert-large-uncased-zh \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_large_uncased_english_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_large_uncased_english_en.md new file mode 100644 index 00000000000000..817bdf437e90dd --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_large_uncased_english_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_uncased_english BertEmbeddings from koala +author: John Snow Labs +name: bert_large_uncased_english +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_uncased_english` is a English model originally trained by koala. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_uncased_english_en_5.1.1_3.0_1694509318989.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_uncased_english_en_5.1.1_3.0_1694509318989.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_uncased_english","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_uncased_english", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_uncased_english| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/koala/bert-large-uncased-en \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_large_uncased_finetuned_clinc150_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_large_uncased_finetuned_clinc150_en.md new file mode 100644 index 00000000000000..9bd92193e94568 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_large_uncased_finetuned_clinc150_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_uncased_finetuned_clinc150 BertEmbeddings from FilippoComastri +author: John Snow Labs +name: bert_large_uncased_finetuned_clinc150 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_uncased_finetuned_clinc150` is a English model originally trained by FilippoComastri. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_uncased_finetuned_clinc150_en_5.1.1_3.0_1694552611395.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_uncased_finetuned_clinc150_en_5.1.1_3.0_1694552611395.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_uncased_finetuned_clinc150","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_uncased_finetuned_clinc150", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_uncased_finetuned_clinc150| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/FilippoComastri/bert-large-uncased-finetuned-clinc150 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_large_uncased_finetuned_da_zero_shot_20_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_large_uncased_finetuned_da_zero_shot_20_en.md new file mode 100644 index 00000000000000..320e405c61ac4d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_large_uncased_finetuned_da_zero_shot_20_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_uncased_finetuned_da_zero_shot_20 BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_uncased_finetuned_da_zero_shot_20 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_uncased_finetuned_da_zero_shot_20` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_uncased_finetuned_da_zero_shot_20_en_5.1.1_3.0_1694560241206.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_uncased_finetuned_da_zero_shot_20_en_5.1.1_3.0_1694560241206.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_uncased_finetuned_da_zero_shot_20","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_uncased_finetuned_da_zero_shot_20", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_uncased_finetuned_da_zero_shot_20| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-uncased-finetuned-DA-Zero-shot-20 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_large_uncased_finetuned_da_zero_shot_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_large_uncased_finetuned_da_zero_shot_en.md new file mode 100644 index 00000000000000..d519bada948a6b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_large_uncased_finetuned_da_zero_shot_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_uncased_finetuned_da_zero_shot BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_uncased_finetuned_da_zero_shot +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_uncased_finetuned_da_zero_shot` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_uncased_finetuned_da_zero_shot_en_5.1.1_3.0_1694559743511.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_uncased_finetuned_da_zero_shot_en_5.1.1_3.0_1694559743511.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_uncased_finetuned_da_zero_shot","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_uncased_finetuned_da_zero_shot", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_uncased_finetuned_da_zero_shot| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-uncased-finetuned-DA-Zero-shot \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_large_uncased_finetuned_imdb_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_large_uncased_finetuned_imdb_en.md new file mode 100644 index 00000000000000..77acfb49b7ec77 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_large_uncased_finetuned_imdb_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_uncased_finetuned_imdb BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_uncased_finetuned_imdb +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_uncased_finetuned_imdb` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_uncased_finetuned_imdb_en_5.1.1_3.0_1694559416760.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_uncased_finetuned_imdb_en_5.1.1_3.0_1694559416760.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_uncased_finetuned_imdb","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_uncased_finetuned_imdb", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_uncased_finetuned_imdb| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_large_uncased_finetuned_youcook_4_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_large_uncased_finetuned_youcook_4_en.md new file mode 100644 index 00000000000000..50c15d66752ada --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_large_uncased_finetuned_youcook_4_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_uncased_finetuned_youcook_4 BertEmbeddings from CennetOguz +author: John Snow Labs +name: bert_large_uncased_finetuned_youcook_4 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_uncased_finetuned_youcook_4` is a English model originally trained by CennetOguz. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_uncased_finetuned_youcook_4_en_5.1.1_3.0_1694560592308.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_uncased_finetuned_youcook_4_en_5.1.1_3.0_1694560592308.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_uncased_finetuned_youcook_4","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_uncased_finetuned_youcook_4", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_uncased_finetuned_youcook_4| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/CennetOguz/bert-large-uncased-finetuned-youcook_4 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_large_uncased_german_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_large_uncased_german_en.md new file mode 100644 index 00000000000000..4df357b4bde208 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_large_uncased_german_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_uncased_german BertEmbeddings from koala +author: John Snow Labs +name: bert_large_uncased_german +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_uncased_german` is a English model originally trained by koala. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_uncased_german_en_5.1.1_3.0_1694509061010.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_uncased_german_en_5.1.1_3.0_1694509061010.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_uncased_german","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_uncased_german", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_uncased_german| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/koala/bert-large-uncased-de \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_large_uncased_hindi_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_large_uncased_hindi_en.md new file mode 100644 index 00000000000000..d47ae7e623ad40 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_large_uncased_hindi_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_uncased_hindi BertEmbeddings from koala +author: John Snow Labs +name: bert_large_uncased_hindi +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_uncased_hindi` is a English model originally trained by koala. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_uncased_hindi_en_5.1.1_3.0_1694509561973.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_uncased_hindi_en_5.1.1_3.0_1694509561973.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_uncased_hindi","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_uncased_hindi", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_uncased_hindi| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/koala/bert-large-uncased-hi \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_large_uncased_korean_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_large_uncased_korean_en.md new file mode 100644 index 00000000000000..e450ff959c84e9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_large_uncased_korean_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_uncased_korean BertEmbeddings from koala +author: John Snow Labs +name: bert_large_uncased_korean +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_uncased_korean` is a English model originally trained by koala. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_uncased_korean_en_5.1.1_3.0_1694509833455.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_uncased_korean_en_5.1.1_3.0_1694509833455.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_uncased_korean","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_uncased_korean", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_uncased_korean| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/koala/bert-large-uncased-ko \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_ltrc_telugu_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_ltrc_telugu_en.md new file mode 100644 index 00000000000000..009d029dcf02f1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_ltrc_telugu_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_ltrc_telugu BertEmbeddings from ltrctelugu +author: John Snow Labs +name: bert_ltrc_telugu +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_ltrc_telugu` is a English model originally trained by ltrctelugu. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_ltrc_telugu_en_5.1.1_3.0_1694549907705.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_ltrc_telugu_en_5.1.1_3.0_1694549907705.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_ltrc_telugu","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_ltrc_telugu", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_ltrc_telugu| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.4 MB| + +## References + +https://huggingface.co/ltrctelugu/bert_ltrc_telugu \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_persian_poetry_fa.md b/docs/_posts/ahmedlone127/2023-09-12-bert_persian_poetry_fa.md new file mode 100644 index 00000000000000..9155037bc88afe --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_persian_poetry_fa.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Persian bert_persian_poetry BertEmbeddings from mitra-mir +author: John Snow Labs +name: bert_persian_poetry +date: 2023-09-12 +tags: [bert, fa, open_source, fill_mask, onnx] +task: Embeddings +language: fa +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_persian_poetry` is a Persian model originally trained by mitra-mir. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_persian_poetry_fa_5.1.1_3.0_1694553964697.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_persian_poetry_fa_5.1.1_3.0_1694553964697.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_persian_poetry","fa") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_persian_poetry", "fa") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_persian_poetry| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|fa| +|Size:|441.4 MB| + +## References + +https://huggingface.co/mitra-mir/BERT-Persian-Poetry \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_political_election2020_twitter_mlm_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_political_election2020_twitter_mlm_en.md new file mode 100644 index 00000000000000..0a2651b7446a9a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_political_election2020_twitter_mlm_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_political_election2020_twitter_mlm BertEmbeddings from kornosk +author: John Snow Labs +name: bert_political_election2020_twitter_mlm +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_political_election2020_twitter_mlm` is a English model originally trained by kornosk. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_political_election2020_twitter_mlm_en_5.1.1_3.0_1694510414367.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_political_election2020_twitter_mlm_en_5.1.1_3.0_1694510414367.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_political_election2020_twitter_mlm","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_political_election2020_twitter_mlm", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_political_election2020_twitter_mlm| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.6 MB| + +## References + +https://huggingface.co/kornosk/bert-political-election2020-twitter-mlm \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_tagalog_base_uncased_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_tagalog_base_uncased_en.md new file mode 100644 index 00000000000000..f2cdce9badceed --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_tagalog_base_uncased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_tagalog_base_uncased BertEmbeddings from GKLMIP +author: John Snow Labs +name: bert_tagalog_base_uncased +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_tagalog_base_uncased` is a English model originally trained by GKLMIP. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_tagalog_base_uncased_en_5.1.1_3.0_1694549199633.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_tagalog_base_uncased_en_5.1.1_3.0_1694549199633.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_tagalog_base_uncased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_tagalog_base_uncased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_tagalog_base_uncased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|469.7 MB| + +## References + +https://huggingface.co/GKLMIP/bert-tagalog-base-uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_test_andychiang_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_test_andychiang_en.md new file mode 100644 index 00000000000000..3cb148360a70e6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_test_andychiang_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_test_andychiang BertEmbeddings from AndyChiang +author: John Snow Labs +name: bert_test_andychiang +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_test_andychiang` is a English model originally trained by AndyChiang. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_test_andychiang_en_5.1.1_3.0_1694559088930.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_test_andychiang_en_5.1.1_3.0_1694559088930.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_test_andychiang","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_test_andychiang", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_test_andychiang| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/AndyChiang/bert-test \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_uncased_finetuned_model_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_uncased_finetuned_model_en.md new file mode 100644 index 00000000000000..83cc82471ed04c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_uncased_finetuned_model_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_uncased_finetuned_model BertEmbeddings from harvinder676 +author: John Snow Labs +name: bert_uncased_finetuned_model +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_uncased_finetuned_model` is a English model originally trained by harvinder676. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_uncased_finetuned_model_en_5.1.1_3.0_1694556527374.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_uncased_finetuned_model_en_5.1.1_3.0_1694556527374.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_uncased_finetuned_model","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_uncased_finetuned_model", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_uncased_finetuned_model| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/harvinder676/bert-uncased-finetuned-model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_uncased_l_12_h_768_a_12_italian_alb3rt0_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_uncased_l_12_h_768_a_12_italian_alb3rt0_en.md new file mode 100644 index 00000000000000..df43c41eaccefa --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_uncased_l_12_h_768_a_12_italian_alb3rt0_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_uncased_l_12_h_768_a_12_italian_alb3rt0 BertEmbeddings from m-polignano-uniba +author: John Snow Labs +name: bert_uncased_l_12_h_768_a_12_italian_alb3rt0 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_uncased_l_12_h_768_a_12_italian_alb3rt0` is a English model originally trained by m-polignano-uniba. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_uncased_l_12_h_768_a_12_italian_alb3rt0_en_5.1.1_3.0_1694550811678.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_uncased_l_12_h_768_a_12_italian_alb3rt0_en_5.1.1_3.0_1694550811678.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_uncased_l_12_h_768_a_12_italian_alb3rt0","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_uncased_l_12_h_768_a_12_italian_alb3rt0", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_uncased_l_12_h_768_a_12_italian_alb3rt0| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|689.7 MB| + +## References + +https://huggingface.co/m-polignano-uniba/bert_uncased_L-12_H-768_A-12_italian_alb3rt0 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bertabaporu_base_uncased_pt.md b/docs/_posts/ahmedlone127/2023-09-12-bertabaporu_base_uncased_pt.md new file mode 100644 index 00000000000000..0140312ebe2270 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bertabaporu_base_uncased_pt.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Portuguese bertabaporu_base_uncased BertEmbeddings from pablocosta +author: John Snow Labs +name: bertabaporu_base_uncased +date: 2023-09-12 +tags: [bert, pt, open_source, fill_mask, onnx] +task: Embeddings +language: pt +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bertabaporu_base_uncased` is a Portuguese model originally trained by pablocosta. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bertabaporu_base_uncased_pt_5.1.1_3.0_1694556656111.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bertabaporu_base_uncased_pt_5.1.1_3.0_1694556656111.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bertabaporu_base_uncased","pt") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bertabaporu_base_uncased", "pt") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bertabaporu_base_uncased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|pt| +|Size:|504.8 MB| + +## References + +https://huggingface.co/pablocosta/bertabaporu-base-uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bertabaporu_large_uncased_pt.md b/docs/_posts/ahmedlone127/2023-09-12-bertabaporu_large_uncased_pt.md new file mode 100644 index 00000000000000..f530999d960781 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bertabaporu_large_uncased_pt.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Portuguese bertabaporu_large_uncased BertEmbeddings from pablocosta +author: John Snow Labs +name: bertabaporu_large_uncased +date: 2023-09-12 +tags: [bert, pt, open_source, fill_mask, onnx] +task: Embeddings +language: pt +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bertabaporu_large_uncased` is a Portuguese model originally trained by pablocosta. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bertabaporu_large_uncased_pt_5.1.1_3.0_1694556951243.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bertabaporu_large_uncased_pt_5.1.1_3.0_1694556951243.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bertabaporu_large_uncased","pt") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bertabaporu_large_uncased", "pt") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bertabaporu_large_uncased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|pt| +|Size:|1.4 GB| + +## References + +https://huggingface.co/pablocosta/bertabaporu-large-uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bertbase_ug_1e_en.md b/docs/_posts/ahmedlone127/2023-09-12-bertbase_ug_1e_en.md new file mode 100644 index 00000000000000..c433c7b1c1cdf9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bertbase_ug_1e_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bertbase_ug_1e BertEmbeddings from TurkLangsTeamURFU +author: John Snow Labs +name: bertbase_ug_1e +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bertbase_ug_1e` is a English model originally trained by TurkLangsTeamURFU. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bertbase_ug_1e_en_5.1.1_3.0_1694559625780.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bertbase_ug_1e_en_5.1.1_3.0_1694559625780.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bertbase_ug_1e","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bertbase_ug_1e", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bertbase_ug_1e| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|494.1 MB| + +## References + +https://huggingface.co/TurkLangsTeamURFU/BertBase_UG_1e \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bertbase_ug_2e_en.md b/docs/_posts/ahmedlone127/2023-09-12-bertbase_ug_2e_en.md new file mode 100644 index 00000000000000..7bdf0ba53b5e80 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bertbase_ug_2e_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bertbase_ug_2e BertEmbeddings from TurkLangsTeamURFU +author: John Snow Labs +name: bertbase_ug_2e +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bertbase_ug_2e` is a English model originally trained by TurkLangsTeamURFU. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bertbase_ug_2e_en_5.1.1_3.0_1694559815371.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bertbase_ug_2e_en_5.1.1_3.0_1694559815371.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bertbase_ug_2e","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bertbase_ug_2e", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bertbase_ug_2e| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|493.6 MB| + +## References + +https://huggingface.co/TurkLangsTeamURFU/BertBase_UG_2e \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bertbase_ug_3e_en.md b/docs/_posts/ahmedlone127/2023-09-12-bertbase_ug_3e_en.md new file mode 100644 index 00000000000000..064c1de630ce77 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bertbase_ug_3e_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bertbase_ug_3e BertEmbeddings from TurkLangsTeamURFU +author: John Snow Labs +name: bertbase_ug_3e +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bertbase_ug_3e` is a English model originally trained by TurkLangsTeamURFU. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bertbase_ug_3e_en_5.1.1_3.0_1694559989794.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bertbase_ug_3e_en_5.1.1_3.0_1694559989794.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bertbase_ug_3e","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bertbase_ug_3e", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bertbase_ug_3e| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|492.9 MB| + +## References + +https://huggingface.co/TurkLangsTeamURFU/BertBase_UG_3e \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bertbasekk_1e_en.md b/docs/_posts/ahmedlone127/2023-09-12-bertbasekk_1e_en.md new file mode 100644 index 00000000000000..adfb015f86eb3a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bertbasekk_1e_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bertbasekk_1e BertEmbeddings from TurkLangsTeamURFU +author: John Snow Labs +name: bertbasekk_1e +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bertbasekk_1e` is a English model originally trained by TurkLangsTeamURFU. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bertbasekk_1e_en_5.1.1_3.0_1694553907199.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bertbasekk_1e_en_5.1.1_3.0_1694553907199.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bertbasekk_1e","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bertbasekk_1e", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bertbasekk_1e| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|494.1 MB| + +## References + +https://huggingface.co/TurkLangsTeamURFU/BertBaseKK_1e \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bertbasekk_2e_en.md b/docs/_posts/ahmedlone127/2023-09-12-bertbasekk_2e_en.md new file mode 100644 index 00000000000000..cd439282d3971e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bertbasekk_2e_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bertbasekk_2e BertEmbeddings from TurkLangsTeamURFU +author: John Snow Labs +name: bertbasekk_2e +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bertbasekk_2e` is a English model originally trained by TurkLangsTeamURFU. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bertbasekk_2e_en_5.1.1_3.0_1694558294650.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bertbasekk_2e_en_5.1.1_3.0_1694558294650.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bertbasekk_2e","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bertbasekk_2e", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bertbasekk_2e| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|493.5 MB| + +## References + +https://huggingface.co/TurkLangsTeamURFU/BertBaseKK_2e \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bertbasekk_3e_en.md b/docs/_posts/ahmedlone127/2023-09-12-bertbasekk_3e_en.md new file mode 100644 index 00000000000000..664ccdcda5be16 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bertbasekk_3e_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bertbasekk_3e BertEmbeddings from TurkLangsTeamURFU +author: John Snow Labs +name: bertbasekk_3e +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bertbasekk_3e` is a English model originally trained by TurkLangsTeamURFU. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bertbasekk_3e_en_5.1.1_3.0_1694558506984.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bertbasekk_3e_en_5.1.1_3.0_1694558506984.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bertbasekk_3e","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bertbasekk_3e", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bertbasekk_3e| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|492.9 MB| + +## References + +https://huggingface.co/TurkLangsTeamURFU/BertBaseKK_3e \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bertimbaulaw_base_portuguese_cased_en.md b/docs/_posts/ahmedlone127/2023-09-12-bertimbaulaw_base_portuguese_cased_en.md new file mode 100644 index 00000000000000..2860b20b5c3843 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bertimbaulaw_base_portuguese_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bertimbaulaw_base_portuguese_cased BertEmbeddings from alfaneo +author: John Snow Labs +name: bertimbaulaw_base_portuguese_cased +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bertimbaulaw_base_portuguese_cased` is a English model originally trained by alfaneo. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bertimbaulaw_base_portuguese_cased_en_5.1.1_3.0_1694557285783.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bertimbaulaw_base_portuguese_cased_en_5.1.1_3.0_1694557285783.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bertimbaulaw_base_portuguese_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bertimbaulaw_base_portuguese_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bertimbaulaw_base_portuguese_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|405.8 MB| + +## References + +https://huggingface.co/alfaneo/bertimbaulaw-base-portuguese-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bertjewdialdata_en.md b/docs/_posts/ahmedlone127/2023-09-12-bertjewdialdata_en.md new file mode 100644 index 00000000000000..5f86509ecf8c0a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bertjewdialdata_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bertjewdialdata BertEmbeddings from Jeska +author: John Snow Labs +name: bertjewdialdata +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bertjewdialdata` is a English model originally trained by Jeska. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bertjewdialdata_en_5.1.1_3.0_1694561421127.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bertjewdialdata_en_5.1.1_3.0_1694561421127.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bertjewdialdata","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bertjewdialdata", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bertjewdialdata| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|406.8 MB| + +## References + +https://huggingface.co/Jeska/BertjeWDialData \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bertjewdialdataall03_en.md b/docs/_posts/ahmedlone127/2023-09-12-bertjewdialdataall03_en.md new file mode 100644 index 00000000000000..c3596f32f058aa --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bertjewdialdataall03_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bertjewdialdataall03 BertEmbeddings from Jeska +author: John Snow Labs +name: bertjewdialdataall03 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bertjewdialdataall03` is a English model originally trained by Jeska. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bertjewdialdataall03_en_5.1.1_3.0_1694561834775.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bertjewdialdataall03_en_5.1.1_3.0_1694561834775.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bertjewdialdataall03","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bertjewdialdataall03", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bertjewdialdataall03| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|406.8 MB| + +## References + +https://huggingface.co/Jeska/BertjeWDialDataALL03 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bertjewdialdataallqonly02_en.md b/docs/_posts/ahmedlone127/2023-09-12-bertjewdialdataallqonly02_en.md new file mode 100644 index 00000000000000..746f768027b890 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bertjewdialdataallqonly02_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bertjewdialdataallqonly02 BertEmbeddings from Jeska +author: John Snow Labs +name: bertjewdialdataallqonly02 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bertjewdialdataallqonly02` is a English model originally trained by Jeska. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bertjewdialdataallqonly02_en_5.1.1_3.0_1694562283503.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bertjewdialdataallqonly02_en_5.1.1_3.0_1694562283503.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bertjewdialdataallqonly02","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bertjewdialdataallqonly02", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bertjewdialdataallqonly02| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|406.8 MB| + +## References + +https://huggingface.co/Jeska/BertjeWDialDataALLQonly02 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bertjewdialdataallqonly03_en.md b/docs/_posts/ahmedlone127/2023-09-12-bertjewdialdataallqonly03_en.md new file mode 100644 index 00000000000000..a6fa8c74e184dc --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bertjewdialdataallqonly03_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bertjewdialdataallqonly03 BertEmbeddings from Jeska +author: John Snow Labs +name: bertjewdialdataallqonly03 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bertjewdialdataallqonly03` is a English model originally trained by Jeska. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bertjewdialdataallqonly03_en_5.1.1_3.0_1694562426971.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bertjewdialdataallqonly03_en_5.1.1_3.0_1694562426971.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bertjewdialdataallqonly03","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bertjewdialdataallqonly03", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bertjewdialdataallqonly03| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|406.8 MB| + +## References + +https://huggingface.co/Jeska/BertjeWDialDataALLQonly03 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bertjewdialdataallqonly05_en.md b/docs/_posts/ahmedlone127/2023-09-12-bertjewdialdataallqonly05_en.md new file mode 100644 index 00000000000000..189dd7a9d07187 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bertjewdialdataallqonly05_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bertjewdialdataallqonly05 BertEmbeddings from Jeska +author: John Snow Labs +name: bertjewdialdataallqonly05 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bertjewdialdataallqonly05` is a English model originally trained by Jeska. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bertjewdialdataallqonly05_en_5.1.1_3.0_1694562715391.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bertjewdialdataallqonly05_en_5.1.1_3.0_1694562715391.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bertjewdialdataallqonly05","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bertjewdialdataallqonly05", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bertjewdialdataallqonly05| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|406.6 MB| + +## References + +https://huggingface.co/Jeska/BertjeWDialDataALLQonly05 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bertjewdialdataallqonly06_en.md b/docs/_posts/ahmedlone127/2023-09-12-bertjewdialdataallqonly06_en.md new file mode 100644 index 00000000000000..b4e3b5cb261d20 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bertjewdialdataallqonly06_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bertjewdialdataallqonly06 BertEmbeddings from Jeska +author: John Snow Labs +name: bertjewdialdataallqonly06 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bertjewdialdataallqonly06` is a English model originally trained by Jeska. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bertjewdialdataallqonly06_en_5.1.1_3.0_1694562884246.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bertjewdialdataallqonly06_en_5.1.1_3.0_1694562884246.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bertjewdialdataallqonly06","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bertjewdialdataallqonly06", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bertjewdialdataallqonly06| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|406.8 MB| + +## References + +https://huggingface.co/Jeska/BertjeWDialDataALLQonly06 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bertjewdialdataallqonly07_en.md b/docs/_posts/ahmedlone127/2023-09-12-bertjewdialdataallqonly07_en.md new file mode 100644 index 00000000000000..2dfc8a0c42b127 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bertjewdialdataallqonly07_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bertjewdialdataallqonly07 BertEmbeddings from Jeska +author: John Snow Labs +name: bertjewdialdataallqonly07 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bertjewdialdataallqonly07` is a English model originally trained by Jeska. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bertjewdialdataallqonly07_en_5.1.1_3.0_1694563003244.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bertjewdialdataallqonly07_en_5.1.1_3.0_1694563003244.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bertjewdialdataallqonly07","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bertjewdialdataallqonly07", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bertjewdialdataallqonly07| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|406.7 MB| + +## References + +https://huggingface.co/Jeska/BertjeWDialDataALLQonly07 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bertjewdialdataallqonly08_en.md b/docs/_posts/ahmedlone127/2023-09-12-bertjewdialdataallqonly08_en.md new file mode 100644 index 00000000000000..d82bd962260441 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bertjewdialdataallqonly08_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bertjewdialdataallqonly08 BertEmbeddings from Jeska +author: John Snow Labs +name: bertjewdialdataallqonly08 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bertjewdialdataallqonly08` is a English model originally trained by Jeska. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bertjewdialdataallqonly08_en_5.1.1_3.0_1694563138956.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bertjewdialdataallqonly08_en_5.1.1_3.0_1694563138956.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bertjewdialdataallqonly08","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bertjewdialdataallqonly08", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bertjewdialdataallqonly08| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|406.8 MB| + +## References + +https://huggingface.co/Jeska/BertjeWDialDataALLQonly08 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bertjewdialdataallqonly_en.md b/docs/_posts/ahmedlone127/2023-09-12-bertjewdialdataallqonly_en.md new file mode 100644 index 00000000000000..80c0ad1a0239c7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bertjewdialdataallqonly_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bertjewdialdataallqonly BertEmbeddings from Jeska +author: John Snow Labs +name: bertjewdialdataallqonly +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bertjewdialdataallqonly` is a English model originally trained by Jeska. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bertjewdialdataallqonly_en_5.1.1_3.0_1694562127211.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bertjewdialdataallqonly_en_5.1.1_3.0_1694562127211.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bertjewdialdataallqonly","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bertjewdialdataallqonly", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bertjewdialdataallqonly| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|406.8 MB| + +## References + +https://huggingface.co/Jeska/BertjeWDialDataALLQonly \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-beto_clinical_wl_spanish_es.md b/docs/_posts/ahmedlone127/2023-09-12-beto_clinical_wl_spanish_es.md new file mode 100644 index 00000000000000..a8541213471dab --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-beto_clinical_wl_spanish_es.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Castilian, Spanish beto_clinical_wl_spanish BertEmbeddings from plncmm +author: John Snow Labs +name: beto_clinical_wl_spanish +date: 2023-09-12 +tags: [bert, es, open_source, fill_mask, onnx] +task: Embeddings +language: es +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`beto_clinical_wl_spanish` is a Castilian, Spanish model originally trained by plncmm. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/beto_clinical_wl_spanish_es_5.1.1_3.0_1694510336701.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/beto_clinical_wl_spanish_es_5.1.1_3.0_1694510336701.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("beto_clinical_wl_spanish","es") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("beto_clinical_wl_spanish", "es") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|beto_clinical_wl_spanish| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|es| +|Size:|409.7 MB| + +## References + +https://huggingface.co/plncmm/beto-clinical-wl-es \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bio_bert_base_spanish_wwm_cased_en.md b/docs/_posts/ahmedlone127/2023-09-12-bio_bert_base_spanish_wwm_cased_en.md new file mode 100644 index 00000000000000..bbc180b3cb44a6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bio_bert_base_spanish_wwm_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bio_bert_base_spanish_wwm_cased BertEmbeddings from mrojas +author: John Snow Labs +name: bio_bert_base_spanish_wwm_cased +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bio_bert_base_spanish_wwm_cased` is a English model originally trained by mrojas. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bio_bert_base_spanish_wwm_cased_en_5.1.1_3.0_1694556544912.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bio_bert_base_spanish_wwm_cased_en_5.1.1_3.0_1694556544912.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bio_bert_base_spanish_wwm_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bio_bert_base_spanish_wwm_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bio_bert_base_spanish_wwm_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|409.0 MB| + +## References + +https://huggingface.co/mrojas/bio-bert-base-spanish-wwm-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-biobert_italian_en.md b/docs/_posts/ahmedlone127/2023-09-12-biobert_italian_en.md new file mode 100644 index 00000000000000..a0dc73da5299ec --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-biobert_italian_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English biobert_italian BertEmbeddings from marcopost-it +author: John Snow Labs +name: biobert_italian +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`biobert_italian` is a English model originally trained by marcopost-it. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/biobert_italian_en_5.1.1_3.0_1694551323419.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/biobert_italian_en_5.1.1_3.0_1694551323419.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("biobert_italian","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("biobert_italian", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|biobert_italian| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|410.2 MB| + +## References + +https://huggingface.co/marcopost-it/biobert-it \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-biobert_patent_reference_extraction_en.md b/docs/_posts/ahmedlone127/2023-09-12-biobert_patent_reference_extraction_en.md new file mode 100644 index 00000000000000..3b583aca84bfbd --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-biobert_patent_reference_extraction_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English biobert_patent_reference_extraction BertEmbeddings from kaesve +author: John Snow Labs +name: biobert_patent_reference_extraction +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`biobert_patent_reference_extraction` is a English model originally trained by kaesve. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/biobert_patent_reference_extraction_en_5.1.1_3.0_1694507994597.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/biobert_patent_reference_extraction_en_5.1.1_3.0_1694507994597.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("biobert_patent_reference_extraction","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("biobert_patent_reference_extraction", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|biobert_patent_reference_extraction| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.1 MB| + +## References + +https://huggingface.co/kaesve/BioBERT_patent_reference_extraction \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-biobert_v1.1_pubmed_en.md b/docs/_posts/ahmedlone127/2023-09-12-biobert_v1.1_pubmed_en.md new file mode 100644 index 00000000000000..b6795b741d65a2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-biobert_v1.1_pubmed_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English biobert_v1.1_pubmed BertEmbeddings from monologg +author: John Snow Labs +name: biobert_v1.1_pubmed +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`biobert_v1.1_pubmed` is a English model originally trained by monologg. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/biobert_v1.1_pubmed_en_5.1.1_3.0_1694554769599.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/biobert_v1.1_pubmed_en_5.1.1_3.0_1694554769599.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("biobert_v1.1_pubmed","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("biobert_v1.1_pubmed", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|biobert_v1.1_pubmed| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.1 MB| + +## References + +https://huggingface.co/monologg/biobert_v1.1_pubmed \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bioclinicalbert_finetuned_covid_papers_en.md b/docs/_posts/ahmedlone127/2023-09-12-bioclinicalbert_finetuned_covid_papers_en.md new file mode 100644 index 00000000000000..512beedd3752f1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bioclinicalbert_finetuned_covid_papers_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bioclinicalbert_finetuned_covid_papers BertEmbeddings from mrm8488 +author: John Snow Labs +name: bioclinicalbert_finetuned_covid_papers +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bioclinicalbert_finetuned_covid_papers` is a English model originally trained by mrm8488. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bioclinicalbert_finetuned_covid_papers_en_5.1.1_3.0_1694556257175.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bioclinicalbert_finetuned_covid_papers_en_5.1.1_3.0_1694556257175.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bioclinicalbert_finetuned_covid_papers","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bioclinicalbert_finetuned_covid_papers", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bioclinicalbert_finetuned_covid_papers| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.2 MB| + +## References + +https://huggingface.co/mrm8488/bioclinicalBERT-finetuned-covid-papers \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-biomednlp_pubmedbert_base_uncased_abstract_en.md b/docs/_posts/ahmedlone127/2023-09-12-biomednlp_pubmedbert_base_uncased_abstract_en.md new file mode 100644 index 00000000000000..423700e0cb5575 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-biomednlp_pubmedbert_base_uncased_abstract_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English biomednlp_pubmedbert_base_uncased_abstract BertEmbeddings from microsoft +author: John Snow Labs +name: biomednlp_pubmedbert_base_uncased_abstract +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`biomednlp_pubmedbert_base_uncased_abstract` is a English model originally trained by microsoft. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/biomednlp_pubmedbert_base_uncased_abstract_en_5.1.1_3.0_1694553564368.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/biomednlp_pubmedbert_base_uncased_abstract_en_5.1.1_3.0_1694553564368.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("biomednlp_pubmedbert_base_uncased_abstract","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("biomednlp_pubmedbert_base_uncased_abstract", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|biomednlp_pubmedbert_base_uncased_abstract| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|408.1 MB| + +## References + +https://huggingface.co/microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bodo_bert_mlm_base_en.md b/docs/_posts/ahmedlone127/2023-09-12-bodo_bert_mlm_base_en.md new file mode 100644 index 00000000000000..b20ee811c39217 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bodo_bert_mlm_base_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bodo_bert_mlm_base BertEmbeddings from alayaran +author: John Snow Labs +name: bodo_bert_mlm_base +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bodo_bert_mlm_base` is a English model originally trained by alayaran. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bodo_bert_mlm_base_en_5.1.1_3.0_1694509384576.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bodo_bert_mlm_base_en_5.1.1_3.0_1694509384576.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bodo_bert_mlm_base","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bodo_bert_mlm_base", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bodo_bert_mlm_base| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.6 MB| + +## References + +https://huggingface.co/alayaran/bodo-bert-mlm-base \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-burmese_bert_mlm_en.md b/docs/_posts/ahmedlone127/2023-09-12-burmese_bert_mlm_en.md new file mode 100644 index 00000000000000..9cbed457538a52 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-burmese_bert_mlm_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English burmese_bert_mlm BertEmbeddings from minn +author: John Snow Labs +name: burmese_bert_mlm +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_bert_mlm` is a English model originally trained by minn. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_bert_mlm_en_5.1.1_3.0_1694553778330.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_bert_mlm_en_5.1.1_3.0_1694553778330.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("burmese_bert_mlm","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("burmese_bert_mlm", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_bert_mlm| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|406.2 MB| + +## References + +https://huggingface.co/minn/my-bert-mlm \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-clinicaladaptation_pubmedbert_base_uncased_mimic_note_en.md b/docs/_posts/ahmedlone127/2023-09-12-clinicaladaptation_pubmedbert_base_uncased_mimic_note_en.md new file mode 100644 index 00000000000000..ffb6856020bd46 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-clinicaladaptation_pubmedbert_base_uncased_mimic_note_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English clinicaladaptation_pubmedbert_base_uncased_mimic_note BertEmbeddings from jhliu +author: John Snow Labs +name: clinicaladaptation_pubmedbert_base_uncased_mimic_note +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`clinicaladaptation_pubmedbert_base_uncased_mimic_note` is a English model originally trained by jhliu. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/clinicaladaptation_pubmedbert_base_uncased_mimic_note_en_5.1.1_3.0_1694550817266.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/clinicaladaptation_pubmedbert_base_uncased_mimic_note_en_5.1.1_3.0_1694550817266.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("clinicaladaptation_pubmedbert_base_uncased_mimic_note","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("clinicaladaptation_pubmedbert_base_uncased_mimic_note", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|clinicaladaptation_pubmedbert_base_uncased_mimic_note| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|408.1 MB| + +## References + +https://huggingface.co/jhliu/ClinicalAdaptation-PubMedBERT-base-uncased-MIMIC-note \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-clinicaladaptation_pubmedbert_base_uncased_mimic_segment_en.md b/docs/_posts/ahmedlone127/2023-09-12-clinicaladaptation_pubmedbert_base_uncased_mimic_segment_en.md new file mode 100644 index 00000000000000..9b4dc43c91f411 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-clinicaladaptation_pubmedbert_base_uncased_mimic_segment_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English clinicaladaptation_pubmedbert_base_uncased_mimic_segment BertEmbeddings from jhliu +author: John Snow Labs +name: clinicaladaptation_pubmedbert_base_uncased_mimic_segment +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`clinicaladaptation_pubmedbert_base_uncased_mimic_segment` is a English model originally trained by jhliu. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/clinicaladaptation_pubmedbert_base_uncased_mimic_segment_en_5.1.1_3.0_1694550645740.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/clinicaladaptation_pubmedbert_base_uncased_mimic_segment_en_5.1.1_3.0_1694550645740.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("clinicaladaptation_pubmedbert_base_uncased_mimic_segment","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("clinicaladaptation_pubmedbert_base_uncased_mimic_segment", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|clinicaladaptation_pubmedbert_base_uncased_mimic_segment| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|408.0 MB| + +## References + +https://huggingface.co/jhliu/ClinicalAdaptation-PubMedBERT-base-uncased-MIMIC-segment \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-clinicaladaptation_pubmedbert_base_uncased_mimic_sentence_en.md b/docs/_posts/ahmedlone127/2023-09-12-clinicaladaptation_pubmedbert_base_uncased_mimic_sentence_en.md new file mode 100644 index 00000000000000..e4b370235616db --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-clinicaladaptation_pubmedbert_base_uncased_mimic_sentence_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English clinicaladaptation_pubmedbert_base_uncased_mimic_sentence BertEmbeddings from jhliu +author: John Snow Labs +name: clinicaladaptation_pubmedbert_base_uncased_mimic_sentence +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`clinicaladaptation_pubmedbert_base_uncased_mimic_sentence` is a English model originally trained by jhliu. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/clinicaladaptation_pubmedbert_base_uncased_mimic_sentence_en_5.1.1_3.0_1694550482919.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/clinicaladaptation_pubmedbert_base_uncased_mimic_sentence_en_5.1.1_3.0_1694550482919.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("clinicaladaptation_pubmedbert_base_uncased_mimic_sentence","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("clinicaladaptation_pubmedbert_base_uncased_mimic_sentence", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|clinicaladaptation_pubmedbert_base_uncased_mimic_sentence| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.9 MB| + +## References + +https://huggingface.co/jhliu/ClinicalAdaptation-PubMedBERT-base-uncased-MIMIC-sentence \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-clinicalnotebert_base_uncased_mimic_segment_note_en.md b/docs/_posts/ahmedlone127/2023-09-12-clinicalnotebert_base_uncased_mimic_segment_note_en.md new file mode 100644 index 00000000000000..b8f6d52173905d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-clinicalnotebert_base_uncased_mimic_segment_note_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English clinicalnotebert_base_uncased_mimic_segment_note BertEmbeddings from jhliu +author: John Snow Labs +name: clinicalnotebert_base_uncased_mimic_segment_note +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`clinicalnotebert_base_uncased_mimic_segment_note` is a English model originally trained by jhliu. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/clinicalnotebert_base_uncased_mimic_segment_note_en_5.1.1_3.0_1694550310365.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/clinicalnotebert_base_uncased_mimic_segment_note_en_5.1.1_3.0_1694550310365.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("clinicalnotebert_base_uncased_mimic_segment_note","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("clinicalnotebert_base_uncased_mimic_segment_note", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|clinicalnotebert_base_uncased_mimic_segment_note| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|408.0 MB| + +## References + +https://huggingface.co/jhliu/ClinicalNoteBERT-base-uncased-MIMIC-segment-note \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-covid19_fake_news_bert_uncased_en.md b/docs/_posts/ahmedlone127/2023-09-12-covid19_fake_news_bert_uncased_en.md new file mode 100644 index 00000000000000..a6af0e72f2bd6c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-covid19_fake_news_bert_uncased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English covid19_fake_news_bert_uncased BertEmbeddings from Jawaher +author: John Snow Labs +name: covid19_fake_news_bert_uncased +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`covid19_fake_news_bert_uncased` is a English model originally trained by Jawaher. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/covid19_fake_news_bert_uncased_en_5.1.1_3.0_1694510515872.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/covid19_fake_news_bert_uncased_en_5.1.1_3.0_1694510515872.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("covid19_fake_news_bert_uncased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("covid19_fake_news_bert_uncased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|covid19_fake_news_bert_uncased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/Jawaher/Covid19-fake-news-bert-uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-covid_scibert_en.md b/docs/_posts/ahmedlone127/2023-09-12-covid_scibert_en.md new file mode 100644 index 00000000000000..75b293db51c7a0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-covid_scibert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English covid_scibert BertEmbeddings from lordtt13 +author: John Snow Labs +name: covid_scibert +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`covid_scibert` is a English model originally trained by lordtt13. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/covid_scibert_en_5.1.1_3.0_1694549378978.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/covid_scibert_en_5.1.1_3.0_1694549378978.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("covid_scibert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("covid_scibert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|covid_scibert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|412.4 MB| + +## References + +https://huggingface.co/lordtt13/COVID-SciBERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-covid_vaccine_twitter_bert_en.md b/docs/_posts/ahmedlone127/2023-09-12-covid_vaccine_twitter_bert_en.md new file mode 100644 index 00000000000000..2189a1b689145b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-covid_vaccine_twitter_bert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English covid_vaccine_twitter_bert BertEmbeddings from GateNLP +author: John Snow Labs +name: covid_vaccine_twitter_bert +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`covid_vaccine_twitter_bert` is a English model originally trained by GateNLP. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/covid_vaccine_twitter_bert_en_5.1.1_3.0_1694553221558.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/covid_vaccine_twitter_bert_en_5.1.1_3.0_1694553221558.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("covid_vaccine_twitter_bert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("covid_vaccine_twitter_bert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|covid_vaccine_twitter_bert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/GateNLP/covid-vaccine-twitter-bert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-cro_cov_csebert_en.md b/docs/_posts/ahmedlone127/2023-09-12-cro_cov_csebert_en.md new file mode 100644 index 00000000000000..a82e91d1fe4222 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-cro_cov_csebert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English cro_cov_csebert BertEmbeddings from InfoCoV +author: John Snow Labs +name: cro_cov_csebert +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`cro_cov_csebert` is a English model originally trained by InfoCoV. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/cro_cov_csebert_en_5.1.1_3.0_1694559840041.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/cro_cov_csebert_en_5.1.1_3.0_1694559840041.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("cro_cov_csebert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("cro_cov_csebert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|cro_cov_csebert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|463.4 MB| + +## References + +https://huggingface.co/InfoCoV/Cro-CoV-cseBERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-crosloengual_bert_hr.md b/docs/_posts/ahmedlone127/2023-09-12-crosloengual_bert_hr.md new file mode 100644 index 00000000000000..f3a4c1d30af803 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-crosloengual_bert_hr.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Croatian crosloengual_bert BertEmbeddings from EMBEDDIA +author: John Snow Labs +name: crosloengual_bert +date: 2023-09-12 +tags: [bert, hr, open_source, fill_mask, onnx] +task: Embeddings +language: hr +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`crosloengual_bert` is a Croatian model originally trained by EMBEDDIA. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/crosloengual_bert_hr_5.1.1_3.0_1694509284745.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/crosloengual_bert_hr_5.1.1_3.0_1694509284745.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("crosloengual_bert","hr") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("crosloengual_bert", "hr") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|crosloengual_bert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|hr| +|Size:|463.4 MB| + +## References + +https://huggingface.co/EMBEDDIA/crosloengual-bert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-custominlawbert_en.md b/docs/_posts/ahmedlone127/2023-09-12-custominlawbert_en.md new file mode 100644 index 00000000000000..7cfed09f199915 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-custominlawbert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English custominlawbert BertEmbeddings from law-ai +author: John Snow Labs +name: custominlawbert +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`custominlawbert` is a English model originally trained by law-ai. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/custominlawbert_en_5.1.1_3.0_1694553237351.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/custominlawbert_en_5.1.1_3.0_1694553237351.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("custominlawbert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("custominlawbert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|custominlawbert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|408.5 MB| + +## References + +https://huggingface.co/law-ai/CustomInLawBERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-cysecbert_en.md b/docs/_posts/ahmedlone127/2023-09-12-cysecbert_en.md new file mode 100644 index 00000000000000..124d42903439a8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-cysecbert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English cysecbert BertEmbeddings from markusbayer +author: John Snow Labs +name: cysecbert +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`cysecbert` is a English model originally trained by markusbayer. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/cysecbert_en_5.1.1_3.0_1694559903370.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/cysecbert_en_5.1.1_3.0_1694559903370.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("cysecbert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("cysecbert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|cysecbert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/markusbayer/CySecBERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-dal_bert_finetuned_address_v1_en.md b/docs/_posts/ahmedlone127/2023-09-12-dal_bert_finetuned_address_v1_en.md new file mode 100644 index 00000000000000..7352a07970a0f8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-dal_bert_finetuned_address_v1_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English dal_bert_finetuned_address_v1 BertEmbeddings from IRI2070 +author: John Snow Labs +name: dal_bert_finetuned_address_v1 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dal_bert_finetuned_address_v1` is a English model originally trained by IRI2070. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dal_bert_finetuned_address_v1_en_5.1.1_3.0_1694554981931.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dal_bert_finetuned_address_v1_en_5.1.1_3.0_1694554981931.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("dal_bert_finetuned_address_v1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("dal_bert_finetuned_address_v1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dal_bert_finetuned_address_v1| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|432.1 MB| + +## References + +https://huggingface.co/IRI2070/dal-bert-finetuned-address-v1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-deberta_base_uncased_en.md b/docs/_posts/ahmedlone127/2023-09-12-deberta_base_uncased_en.md new file mode 100644 index 00000000000000..44870658d01905 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-deberta_base_uncased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English deberta_base_uncased BertEmbeddings from mlcorelib +author: John Snow Labs +name: deberta_base_uncased +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deberta_base_uncased` is a English model originally trained by mlcorelib. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deberta_base_uncased_en_5.1.1_3.0_1694554096071.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deberta_base_uncased_en_5.1.1_3.0_1694554096071.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("deberta_base_uncased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("deberta_base_uncased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deberta_base_uncased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/mlcorelib/deberta-base-uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-divehi_labse_dv.md b/docs/_posts/ahmedlone127/2023-09-12-divehi_labse_dv.md new file mode 100644 index 00000000000000..7019823ece87a6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-divehi_labse_dv.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Dhivehi, Divehi, Maldivian divehi_labse BertEmbeddings from monsoon-nlp +author: John Snow Labs +name: divehi_labse +date: 2023-09-12 +tags: [bert, dv, open_source, fill_mask, onnx] +task: Embeddings +language: dv +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`divehi_labse` is a Dhivehi, Divehi, Maldivian model originally trained by monsoon-nlp. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/divehi_labse_dv_5.1.1_3.0_1694555287990.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/divehi_labse_dv_5.1.1_3.0_1694555287990.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("divehi_labse","dv") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("divehi_labse", "dv") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|divehi_labse| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|dv| +|Size:|1.9 GB| + +## References + +https://huggingface.co/monsoon-nlp/dv-labse \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-divehi_muril_dv.md b/docs/_posts/ahmedlone127/2023-09-12-divehi_muril_dv.md new file mode 100644 index 00000000000000..7b13bb38cf35a7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-divehi_muril_dv.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Dhivehi, Divehi, Maldivian divehi_muril BertEmbeddings from monsoon-nlp +author: John Snow Labs +name: divehi_muril +date: 2023-09-12 +tags: [bert, dv, open_source, fill_mask, onnx] +task: Embeddings +language: dv +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`divehi_muril` is a Dhivehi, Divehi, Maldivian model originally trained by monsoon-nlp. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/divehi_muril_dv_5.1.1_3.0_1694555552470.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/divehi_muril_dv_5.1.1_3.0_1694555552470.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("divehi_muril","dv") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("divehi_muril", "dv") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|divehi_muril| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|dv| +|Size:|919.1 MB| + +## References + +https://huggingface.co/monsoon-nlp/dv-muril \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-dlub_2022_mlm_en.md b/docs/_posts/ahmedlone127/2023-09-12-dlub_2022_mlm_en.md new file mode 100644 index 00000000000000..3d73ff9eaffdb5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-dlub_2022_mlm_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English dlub_2022_mlm BertEmbeddings from bayartsogt +author: John Snow Labs +name: dlub_2022_mlm +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dlub_2022_mlm` is a English model originally trained by bayartsogt. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dlub_2022_mlm_en_5.1.1_3.0_1694555892646.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dlub_2022_mlm_en_5.1.1_3.0_1694555892646.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("dlub_2022_mlm","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("dlub_2022_mlm", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dlub_2022_mlm| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.6 MB| + +## References + +https://huggingface.co/bayartsogt/dlub-2022-mlm \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-dlub_2022_mlm_full_gansukh_en.md b/docs/_posts/ahmedlone127/2023-09-12-dlub_2022_mlm_full_gansukh_en.md new file mode 100644 index 00000000000000..c1b12f7c896339 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-dlub_2022_mlm_full_gansukh_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English dlub_2022_mlm_full_gansukh BertEmbeddings from Gansukh +author: John Snow Labs +name: dlub_2022_mlm_full_gansukh +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dlub_2022_mlm_full_gansukh` is a English model originally trained by Gansukh. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dlub_2022_mlm_full_gansukh_en_5.1.1_3.0_1694555350358.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dlub_2022_mlm_full_gansukh_en_5.1.1_3.0_1694555350358.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("dlub_2022_mlm_full_gansukh","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("dlub_2022_mlm_full_gansukh", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dlub_2022_mlm_full_gansukh| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.6 MB| + +## References + +https://huggingface.co/Gansukh/dlub-2022-mlm-full \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-dlub_2022_mlm_full_ganzorig_en.md b/docs/_posts/ahmedlone127/2023-09-12-dlub_2022_mlm_full_ganzorig_en.md new file mode 100644 index 00000000000000..8b063a7d5ded53 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-dlub_2022_mlm_full_ganzorig_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English dlub_2022_mlm_full_ganzorig BertEmbeddings from ganzorig +author: John Snow Labs +name: dlub_2022_mlm_full_ganzorig +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dlub_2022_mlm_full_ganzorig` is a English model originally trained by ganzorig. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dlub_2022_mlm_full_ganzorig_en_5.1.1_3.0_1694555674512.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dlub_2022_mlm_full_ganzorig_en_5.1.1_3.0_1694555674512.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("dlub_2022_mlm_full_ganzorig","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("dlub_2022_mlm_full_ganzorig", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dlub_2022_mlm_full_ganzorig| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.8 MB| + +## References + +https://huggingface.co/ganzorig/dlub-2022-mlm-full \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-dlub_2022_mlm_full_omunkhuush_en.md b/docs/_posts/ahmedlone127/2023-09-12-dlub_2022_mlm_full_omunkhuush_en.md new file mode 100644 index 00000000000000..9085341fd867d0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-dlub_2022_mlm_full_omunkhuush_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English dlub_2022_mlm_full_omunkhuush BertEmbeddings from omunkhuush +author: John Snow Labs +name: dlub_2022_mlm_full_omunkhuush +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dlub_2022_mlm_full_omunkhuush` is a English model originally trained by omunkhuush. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dlub_2022_mlm_full_omunkhuush_en_5.1.1_3.0_1694555506851.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dlub_2022_mlm_full_omunkhuush_en_5.1.1_3.0_1694555506851.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("dlub_2022_mlm_full_omunkhuush","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("dlub_2022_mlm_full_omunkhuush", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dlub_2022_mlm_full_omunkhuush| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.7 MB| + +## References + +https://huggingface.co/omunkhuush/dlub-2022-mlm-full \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-dmis_bertpubmed_en.md b/docs/_posts/ahmedlone127/2023-09-12-dmis_bertpubmed_en.md new file mode 100644 index 00000000000000..d08f5b31244e6b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-dmis_bertpubmed_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English dmis_bertpubmed BertEmbeddings from abnuel +author: John Snow Labs +name: dmis_bertpubmed +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dmis_bertpubmed` is a English model originally trained by abnuel. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dmis_bertpubmed_en_5.1.1_3.0_1694561812368.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dmis_bertpubmed_en_5.1.1_3.0_1694561812368.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("dmis_bertpubmed","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("dmis_bertpubmed", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dmis_bertpubmed| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.6 MB| + +## References + +https://huggingface.co/abnuel/dmis_bertpubmed \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-dpr_passage_uned_en.md b/docs/_posts/ahmedlone127/2023-09-12-dpr_passage_uned_en.md new file mode 100644 index 00000000000000..e7ce09605c4ebe --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-dpr_passage_uned_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English dpr_passage_uned BertEmbeddings from avacaondata +author: John Snow Labs +name: dpr_passage_uned +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dpr_passage_uned` is a English model originally trained by avacaondata. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dpr_passage_uned_en_5.1.1_3.0_1694554318636.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dpr_passage_uned_en_5.1.1_3.0_1694554318636.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("dpr_passage_uned","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("dpr_passage_uned", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dpr_passage_uned| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|409.5 MB| + +## References + +https://huggingface.co/avacaondata/dpr-passage-uned \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-dpr_query_uned_en.md b/docs/_posts/ahmedlone127/2023-09-12-dpr_query_uned_en.md new file mode 100644 index 00000000000000..1e3272bd6f9bcd --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-dpr_query_uned_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English dpr_query_uned BertEmbeddings from avacaondata +author: John Snow Labs +name: dpr_query_uned +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dpr_query_uned` is a English model originally trained by avacaondata. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dpr_query_uned_en_5.1.1_3.0_1694554164070.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dpr_query_uned_en_5.1.1_3.0_1694554164070.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("dpr_query_uned","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("dpr_query_uned", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dpr_query_uned| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|409.5 MB| + +## References + +https://huggingface.co/avacaondata/dpr-query-uned \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-drbert_4gb_cp_pubmedbert_fr.md b/docs/_posts/ahmedlone127/2023-09-12-drbert_4gb_cp_pubmedbert_fr.md new file mode 100644 index 00000000000000..50d27a903f93f0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-drbert_4gb_cp_pubmedbert_fr.md @@ -0,0 +1,93 @@ +--- +layout: model +title: French drbert_4gb_cp_pubmedbert BertEmbeddings from Dr-BERT +author: John Snow Labs +name: drbert_4gb_cp_pubmedbert +date: 2023-09-12 +tags: [bert, fr, open_source, fill_mask, onnx] +task: Embeddings +language: fr +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`drbert_4gb_cp_pubmedbert` is a French model originally trained by Dr-BERT. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/drbert_4gb_cp_pubmedbert_fr_5.1.1_3.0_1694550608539.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/drbert_4gb_cp_pubmedbert_fr_5.1.1_3.0_1694550608539.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("drbert_4gb_cp_pubmedbert","fr") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("drbert_4gb_cp_pubmedbert", "fr") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|drbert_4gb_cp_pubmedbert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|fr| +|Size:|408.2 MB| + +## References + +https://huggingface.co/Dr-BERT/DrBERT-4GB-CP-PubMedBERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-drclips_en.md b/docs/_posts/ahmedlone127/2023-09-12-drclips_en.md new file mode 100644 index 00000000000000..4a0d7ba74ff95d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-drclips_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English drclips BertEmbeddings from maximedb +author: John Snow Labs +name: drclips +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`drclips` is a English model originally trained by maximedb. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/drclips_en_5.1.1_3.0_1694552359667.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/drclips_en_5.1.1_3.0_1694552359667.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("drclips","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("drclips", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|drclips| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|406.8 MB| + +## References + +https://huggingface.co/maximedb/drclips \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-dummy_model_arthuerwang_en.md b/docs/_posts/ahmedlone127/2023-09-12-dummy_model_arthuerwang_en.md new file mode 100644 index 00000000000000..e21a27e6031c61 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-dummy_model_arthuerwang_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English dummy_model_arthuerwang BertEmbeddings from Arthuerwang +author: John Snow Labs +name: dummy_model_arthuerwang +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dummy_model_arthuerwang` is a English model originally trained by Arthuerwang. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dummy_model_arthuerwang_en_5.1.1_3.0_1694509846009.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dummy_model_arthuerwang_en_5.1.1_3.0_1694509846009.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("dummy_model_arthuerwang","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("dummy_model_arthuerwang", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dummy_model_arthuerwang| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/Arthuerwang/dummy-model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-europarl_mlm_en.md b/docs/_posts/ahmedlone127/2023-09-12-europarl_mlm_en.md new file mode 100644 index 00000000000000..aba29d2d6dfab6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-europarl_mlm_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English europarl_mlm BertEmbeddings from lukabor +author: John Snow Labs +name: europarl_mlm +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`europarl_mlm` is a English model originally trained by lukabor. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/europarl_mlm_en_5.1.1_3.0_1694550101034.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/europarl_mlm_en_5.1.1_3.0_1694550101034.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("europarl_mlm","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("europarl_mlm", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|europarl_mlm| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.0 MB| + +## References + +https://huggingface.co/lukabor/europarl-mlm \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-finest_bert_fi.md b/docs/_posts/ahmedlone127/2023-09-12-finest_bert_fi.md new file mode 100644 index 00000000000000..2a96b2bf931a91 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-finest_bert_fi.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Finnish finest_bert BertEmbeddings from EMBEDDIA +author: John Snow Labs +name: finest_bert +date: 2023-09-12 +tags: [bert, fi, open_source, fill_mask, onnx] +task: Embeddings +language: fi +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`finest_bert` is a Finnish model originally trained by EMBEDDIA. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/finest_bert_fi_5.1.1_3.0_1694509453012.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/finest_bert_fi_5.1.1_3.0_1694509453012.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("finest_bert","fi") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("finest_bert", "fi") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|finest_bert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|fi| +|Size:|535.1 MB| + +## References + +https://huggingface.co/EMBEDDIA/finest-bert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-finetuned_bert_base_multilingual_cased_noisy_english_malay_xx.md b/docs/_posts/ahmedlone127/2023-09-12-finetuned_bert_base_multilingual_cased_noisy_english_malay_xx.md new file mode 100644 index 00000000000000..e2d475104f3bde --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-finetuned_bert_base_multilingual_cased_noisy_english_malay_xx.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Multilingual finetuned_bert_base_multilingual_cased_noisy_english_malay BertEmbeddings from mesolitica +author: John Snow Labs +name: finetuned_bert_base_multilingual_cased_noisy_english_malay +date: 2023-09-12 +tags: [bert, xx, open_source, fill_mask, onnx] +task: Embeddings +language: xx +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`finetuned_bert_base_multilingual_cased_noisy_english_malay` is a Multilingual model originally trained by mesolitica. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/finetuned_bert_base_multilingual_cased_noisy_english_malay_xx_5.1.1_3.0_1694556066679.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/finetuned_bert_base_multilingual_cased_noisy_english_malay_xx_5.1.1_3.0_1694556066679.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("finetuned_bert_base_multilingual_cased_noisy_english_malay","xx") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("finetuned_bert_base_multilingual_cased_noisy_english_malay", "xx") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|finetuned_bert_base_multilingual_cased_noisy_english_malay| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|xx| +|Size:|557.7 MB| + +## References + +https://huggingface.co/mesolitica/finetuned-bert-base-multilingual-cased-noisy-en-ms \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-firmanbrilianbert_en.md b/docs/_posts/ahmedlone127/2023-09-12-firmanbrilianbert_en.md new file mode 100644 index 00000000000000..67e9989d6439a2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-firmanbrilianbert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English firmanbrilianbert BertEmbeddings from FirmanBr +author: John Snow Labs +name: firmanbrilianbert +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`firmanbrilianbert` is a English model originally trained by FirmanBr. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/firmanbrilianbert_en_5.1.1_3.0_1694548253279.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/firmanbrilianbert_en_5.1.1_3.0_1694548253279.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("firmanbrilianbert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("firmanbrilianbert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|firmanbrilianbert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|412.4 MB| + +## References + +https://huggingface.co/FirmanBr/FirmanBrilianBert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-flang_bert_en.md b/docs/_posts/ahmedlone127/2023-09-12-flang_bert_en.md new file mode 100644 index 00000000000000..d804ac64ea7551 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-flang_bert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English flang_bert BertEmbeddings from SALT-NLP +author: John Snow Labs +name: flang_bert +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`flang_bert` is a English model originally trained by SALT-NLP. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/flang_bert_en_5.1.1_3.0_1694553487024.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/flang_bert_en_5.1.1_3.0_1694553487024.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("flang_bert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("flang_bert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|flang_bert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/SALT-NLP/FLANG-BERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-flang_spanbert_en.md b/docs/_posts/ahmedlone127/2023-09-12-flang_spanbert_en.md new file mode 100644 index 00000000000000..b79cb06b88f94a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-flang_spanbert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English flang_spanbert BertEmbeddings from SALT-NLP +author: John Snow Labs +name: flang_spanbert +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`flang_spanbert` is a English model originally trained by SALT-NLP. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/flang_spanbert_en_5.1.1_3.0_1694553636224.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/flang_spanbert_en_5.1.1_3.0_1694553636224.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("flang_spanbert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("flang_spanbert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|flang_spanbert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.6 MB| + +## References + +https://huggingface.co/SALT-NLP/FLANG-SpanBERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-gbert_large_en.md b/docs/_posts/ahmedlone127/2023-09-12-gbert_large_en.md new file mode 100644 index 00000000000000..ef130a63b743b0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-gbert_large_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English gbert_large BertEmbeddings from Anjoe +author: John Snow Labs +name: gbert_large +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`gbert_large` is a English model originally trained by Anjoe. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/gbert_large_en_5.1.1_3.0_1694548898842.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/gbert_large_en_5.1.1_3.0_1694548898842.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("gbert_large","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("gbert_large", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|gbert_large| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/Anjoe/gbert-large \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-hinglish_bert_en.md b/docs/_posts/ahmedlone127/2023-09-12-hinglish_bert_en.md new file mode 100644 index 00000000000000..5701ea912237f8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-hinglish_bert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English hinglish_bert BertEmbeddings from meghanabhange +author: John Snow Labs +name: hinglish_bert +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`hinglish_bert` is a English model originally trained by meghanabhange. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/hinglish_bert_en_5.1.1_3.0_1694552706521.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/hinglish_bert_en_5.1.1_3.0_1694552706521.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("hinglish_bert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("hinglish_bert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|hinglish_bert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|665.0 MB| + +## References + +https://huggingface.co/meghanabhange/Hinglish-Bert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-hinglish_sbert_en.md b/docs/_posts/ahmedlone127/2023-09-12-hinglish_sbert_en.md new file mode 100644 index 00000000000000..7cf1830542349a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-hinglish_sbert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English hinglish_sbert BertEmbeddings from meghanabhange +author: John Snow Labs +name: hinglish_sbert +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`hinglish_sbert` is a English model originally trained by meghanabhange. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/hinglish_sbert_en_5.1.1_3.0_1694552855686.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/hinglish_sbert_en_5.1.1_3.0_1694552855686.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("hinglish_sbert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("hinglish_sbert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|hinglish_sbert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.8 MB| + +## References + +https://huggingface.co/meghanabhange/hinglish-sbert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-hinglish_sentence_bert_en.md b/docs/_posts/ahmedlone127/2023-09-12-hinglish_sentence_bert_en.md new file mode 100644 index 00000000000000..f4fd57510a2475 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-hinglish_sentence_bert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English hinglish_sentence_bert BertEmbeddings from meghanabhange +author: John Snow Labs +name: hinglish_sentence_bert +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`hinglish_sentence_bert` is a English model originally trained by meghanabhange. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/hinglish_sentence_bert_en_5.1.1_3.0_1694553037722.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/hinglish_sentence_bert_en_5.1.1_3.0_1694553037722.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("hinglish_sentence_bert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("hinglish_sentence_bert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|hinglish_sentence_bert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.7 MB| + +## References + +https://huggingface.co/meghanabhange/hinglish-sentence-bert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-hm_model001_en.md b/docs/_posts/ahmedlone127/2023-09-12-hm_model001_en.md new file mode 100644 index 00000000000000..cb307dedd6ecb3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-hm_model001_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English hm_model001 BertEmbeddings from FAN-L +author: John Snow Labs +name: hm_model001 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`hm_model001` is a English model originally trained by FAN-L. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/hm_model001_en_5.1.1_3.0_1694547885453.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/hm_model001_en_5.1.1_3.0_1694547885453.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("hm_model001","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("hm_model001", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|hm_model001| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/FAN-L/HM_model001 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-hubertkl_en.md b/docs/_posts/ahmedlone127/2023-09-12-hubertkl_en.md new file mode 100644 index 00000000000000..19ba053bf5913d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-hubertkl_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English hubertkl BertEmbeddings from SzegedAI +author: John Snow Labs +name: hubertkl +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`hubertkl` is a English model originally trained by SzegedAI. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/hubertkl_en_5.1.1_3.0_1694550469948.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/hubertkl_en_5.1.1_3.0_1694550469948.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("hubertkl","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("hubertkl", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|hubertkl| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|421.1 MB| + +## References + +https://huggingface.co/SzegedAI/HuBERTkl \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-hubertmlm_en.md b/docs/_posts/ahmedlone127/2023-09-12-hubertmlm_en.md new file mode 100644 index 00000000000000..f6acb129c6dfad --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-hubertmlm_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English hubertmlm BertEmbeddings from SzegedAI +author: John Snow Labs +name: hubertmlm +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`hubertmlm` is a English model originally trained by SzegedAI. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/hubertmlm_en_5.1.1_3.0_1694550296332.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/hubertmlm_en_5.1.1_3.0_1694550296332.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("hubertmlm","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("hubertmlm", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|hubertmlm| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|412.4 MB| + +## References + +https://huggingface.co/SzegedAI/HuBERTmlm \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-indojave_codemixed_indobert_base_id.md b/docs/_posts/ahmedlone127/2023-09-12-indojave_codemixed_indobert_base_id.md new file mode 100644 index 00000000000000..9ca84832e77dc6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-indojave_codemixed_indobert_base_id.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Indonesian indojave_codemixed_indobert_base BertEmbeddings from fathan +author: John Snow Labs +name: indojave_codemixed_indobert_base +date: 2023-09-12 +tags: [bert, id, open_source, fill_mask, onnx] +task: Embeddings +language: id +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`indojave_codemixed_indobert_base` is a Indonesian model originally trained by fathan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/indojave_codemixed_indobert_base_id_5.1.1_3.0_1694549511841.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/indojave_codemixed_indobert_base_id_5.1.1_3.0_1694549511841.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("indojave_codemixed_indobert_base","id") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("indojave_codemixed_indobert_base", "id") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|indojave_codemixed_indobert_base| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|id| +|Size:|411.5 MB| + +## References + +https://huggingface.co/fathan/indojave-codemixed-indobert-base \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-jobbert_german_de.md b/docs/_posts/ahmedlone127/2023-09-12-jobbert_german_de.md new file mode 100644 index 00000000000000..98ccda4cbfe005 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-jobbert_german_de.md @@ -0,0 +1,93 @@ +--- +layout: model +title: German jobbert_german BertEmbeddings from agne +author: John Snow Labs +name: jobbert_german +date: 2023-09-12 +tags: [bert, de, open_source, fill_mask, onnx] +task: Embeddings +language: de +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`jobbert_german` is a German model originally trained by agne. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/jobbert_german_de_5.1.1_3.0_1694508954522.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/jobbert_german_de_5.1.1_3.0_1694508954522.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("jobbert_german","de") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("jobbert_german", "de") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|jobbert_german| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|de| +|Size:|406.9 MB| + +## References + +https://huggingface.co/agne/jobBERT-de \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-jobgbert_de.md b/docs/_posts/ahmedlone127/2023-09-12-jobgbert_de.md new file mode 100644 index 00000000000000..5bcbf8219d5633 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-jobgbert_de.md @@ -0,0 +1,93 @@ +--- +layout: model +title: German jobgbert BertEmbeddings from agne +author: John Snow Labs +name: jobgbert +date: 2023-09-12 +tags: [bert, de, open_source, fill_mask, onnx] +task: Embeddings +language: de +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`jobgbert` is a German model originally trained by agne. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/jobgbert_de_5.1.1_3.0_1694509133987.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/jobgbert_de_5.1.1_3.0_1694509133987.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("jobgbert","de") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("jobgbert", "de") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|jobgbert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|de| +|Size:|409.5 MB| + +## References + +https://huggingface.co/agne/jobGBERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-jurisbert_base_portuguese_uncased_en.md b/docs/_posts/ahmedlone127/2023-09-12-jurisbert_base_portuguese_uncased_en.md new file mode 100644 index 00000000000000..b30d0d72a3aeb8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-jurisbert_base_portuguese_uncased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English jurisbert_base_portuguese_uncased BertEmbeddings from alfaneo +author: John Snow Labs +name: jurisbert_base_portuguese_uncased +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`jurisbert_base_portuguese_uncased` is a English model originally trained by alfaneo. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/jurisbert_base_portuguese_uncased_en_5.1.1_3.0_1694556436181.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/jurisbert_base_portuguese_uncased_en_5.1.1_3.0_1694556436181.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("jurisbert_base_portuguese_uncased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("jurisbert_base_portuguese_uncased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|jurisbert_base_portuguese_uncased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|406.3 MB| + +## References + +https://huggingface.co/alfaneo/jurisbert-base-portuguese-uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-k_12bert_en.md b/docs/_posts/ahmedlone127/2023-09-12-k_12bert_en.md new file mode 100644 index 00000000000000..f9372b7d4b0227 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-k_12bert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English k_12bert BertEmbeddings from vasugoel +author: John Snow Labs +name: k_12bert +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`k_12bert` is a English model originally trained by vasugoel. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/k_12bert_en_5.1.1_3.0_1694557848064.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/k_12bert_en_5.1.1_3.0_1694557848064.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("k_12bert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("k_12bert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|k_12bert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/vasugoel/K-12BERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-kbbert_distilled_cased_sv.md b/docs/_posts/ahmedlone127/2023-09-12-kbbert_distilled_cased_sv.md new file mode 100644 index 00000000000000..a9b5c5b7c321c1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-kbbert_distilled_cased_sv.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Swedish kbbert_distilled_cased BertEmbeddings from Addedk +author: John Snow Labs +name: kbbert_distilled_cased +date: 2023-09-12 +tags: [bert, sv, open_source, fill_mask, onnx] +task: Embeddings +language: sv +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`kbbert_distilled_cased` is a Swedish model originally trained by Addedk. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/kbbert_distilled_cased_sv_5.1.1_3.0_1694550965718.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/kbbert_distilled_cased_sv_5.1.1_3.0_1694550965718.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("kbbert_distilled_cased","sv") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("kbbert_distilled_cased", "sv") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|kbbert_distilled_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|sv| +|Size:|305.8 MB| + +## References + +https://huggingface.co/Addedk/kbbert-distilled-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-kkmultbert_1e_en.md b/docs/_posts/ahmedlone127/2023-09-12-kkmultbert_1e_en.md new file mode 100644 index 00000000000000..66ed603ae401b9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-kkmultbert_1e_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English kkmultbert_1e BertEmbeddings from TurkLangsTeamURFU +author: John Snow Labs +name: kkmultbert_1e +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`kkmultbert_1e` is a English model originally trained by TurkLangsTeamURFU. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/kkmultbert_1e_en_5.1.1_3.0_1694553093201.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/kkmultbert_1e_en_5.1.1_3.0_1694553093201.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("kkmultbert_1e","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("kkmultbert_1e", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|kkmultbert_1e| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|494.3 MB| + +## References + +https://huggingface.co/TurkLangsTeamURFU/KKMultBert_1e \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-kkmultbert_3e_en.md b/docs/_posts/ahmedlone127/2023-09-12-kkmultbert_3e_en.md new file mode 100644 index 00000000000000..b4eed81ec9c53b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-kkmultbert_3e_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English kkmultbert_3e BertEmbeddings from TurkLangsTeamURFU +author: John Snow Labs +name: kkmultbert_3e +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`kkmultbert_3e` is a English model originally trained by TurkLangsTeamURFU. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/kkmultbert_3e_en_5.1.1_3.0_1694553624770.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/kkmultbert_3e_en_5.1.1_3.0_1694553624770.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("kkmultbert_3e","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("kkmultbert_3e", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|kkmultbert_3e| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|494.1 MB| + +## References + +https://huggingface.co/TurkLangsTeamURFU/KKMultBert_3e \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-kkturkbert_bynaumen_1e_en.md b/docs/_posts/ahmedlone127/2023-09-12-kkturkbert_bynaumen_1e_en.md new file mode 100644 index 00000000000000..5ef92c1e47d453 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-kkturkbert_bynaumen_1e_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English kkturkbert_bynaumen_1e BertEmbeddings from TurkLangsTeamURFU +author: John Snow Labs +name: kkturkbert_bynaumen_1e +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`kkturkbert_bynaumen_1e` is a English model originally trained by TurkLangsTeamURFU. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/kkturkbert_bynaumen_1e_en_5.1.1_3.0_1694559139710.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/kkturkbert_bynaumen_1e_en_5.1.1_3.0_1694559139710.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("kkturkbert_bynaumen_1e","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("kkturkbert_bynaumen_1e", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|kkturkbert_bynaumen_1e| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|494.0 MB| + +## References + +https://huggingface.co/TurkLangsTeamURFU/KKTurkBert_byNAUMEN_1e \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-kkturkbert_bynaumen_2e_en.md b/docs/_posts/ahmedlone127/2023-09-12-kkturkbert_bynaumen_2e_en.md new file mode 100644 index 00000000000000..79fbc1e527c3e0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-kkturkbert_bynaumen_2e_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English kkturkbert_bynaumen_2e BertEmbeddings from TurkLangsTeamURFU +author: John Snow Labs +name: kkturkbert_bynaumen_2e +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`kkturkbert_bynaumen_2e` is a English model originally trained by TurkLangsTeamURFU. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/kkturkbert_bynaumen_2e_en_5.1.1_3.0_1694558924867.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/kkturkbert_bynaumen_2e_en_5.1.1_3.0_1694558924867.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("kkturkbert_bynaumen_2e","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("kkturkbert_bynaumen_2e", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|kkturkbert_bynaumen_2e| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|493.6 MB| + +## References + +https://huggingface.co/TurkLangsTeamURFU/KKTurkBert_byNAUMEN_2e \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-kkturkbert_bynaumen_3e_en.md b/docs/_posts/ahmedlone127/2023-09-12-kkturkbert_bynaumen_3e_en.md new file mode 100644 index 00000000000000..b6c076196dbbe0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-kkturkbert_bynaumen_3e_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English kkturkbert_bynaumen_3e BertEmbeddings from TurkLangsTeamURFU +author: John Snow Labs +name: kkturkbert_bynaumen_3e +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`kkturkbert_bynaumen_3e` is a English model originally trained by TurkLangsTeamURFU. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/kkturkbert_bynaumen_3e_en_5.1.1_3.0_1694558723631.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/kkturkbert_bynaumen_3e_en_5.1.1_3.0_1694558723631.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("kkturkbert_bynaumen_3e","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("kkturkbert_bynaumen_3e", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|kkturkbert_bynaumen_3e| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|493.0 MB| + +## References + +https://huggingface.co/TurkLangsTeamURFU/KKTurkBert_byNAUMEN_3e \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-korean_mathbert_en.md b/docs/_posts/ahmedlone127/2023-09-12-korean_mathbert_en.md new file mode 100644 index 00000000000000..9c7ef7b8dc8b8c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-korean_mathbert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English korean_mathbert BertEmbeddings from jnsulee +author: John Snow Labs +name: korean_mathbert +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`korean_mathbert` is a English model originally trained by jnsulee. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/korean_mathbert_en_5.1.1_3.0_1694553539059.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/korean_mathbert_en_5.1.1_3.0_1694553539059.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("korean_mathbert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("korean_mathbert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|korean_mathbert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|412.4 MB| + +## References + +https://huggingface.co/jnsulee/ko-mathbert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-kw_pubmed_keyword_sentence_10000_0.0003_en.md b/docs/_posts/ahmedlone127/2023-09-12-kw_pubmed_keyword_sentence_10000_0.0003_en.md new file mode 100644 index 00000000000000..eaf877612e1c35 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-kw_pubmed_keyword_sentence_10000_0.0003_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English kw_pubmed_keyword_sentence_10000_0.0003 BertEmbeddings from enoriega +author: John Snow Labs +name: kw_pubmed_keyword_sentence_10000_0.0003 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`kw_pubmed_keyword_sentence_10000_0.0003` is a English model originally trained by enoriega. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/kw_pubmed_keyword_sentence_10000_0.0003_en_5.1.1_3.0_1694548488337.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/kw_pubmed_keyword_sentence_10000_0.0003_en_5.1.1_3.0_1694548488337.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("kw_pubmed_keyword_sentence_10000_0.0003","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("kw_pubmed_keyword_sentence_10000_0.0003", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|kw_pubmed_keyword_sentence_10000_0.0003| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|408.1 MB| + +## References + +https://huggingface.co/enoriega/kw_pubmed_keyword_sentence_10000_0.0003 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-kw_pubmed_vanilla_document_10000_0.0003_en.md b/docs/_posts/ahmedlone127/2023-09-12-kw_pubmed_vanilla_document_10000_0.0003_en.md new file mode 100644 index 00000000000000..9eb8a8f55f5076 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-kw_pubmed_vanilla_document_10000_0.0003_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English kw_pubmed_vanilla_document_10000_0.0003 BertEmbeddings from enoriega +author: John Snow Labs +name: kw_pubmed_vanilla_document_10000_0.0003 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`kw_pubmed_vanilla_document_10000_0.0003` is a English model originally trained by enoriega. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/kw_pubmed_vanilla_document_10000_0.0003_en_5.1.1_3.0_1694548616966.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/kw_pubmed_vanilla_document_10000_0.0003_en_5.1.1_3.0_1694548616966.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("kw_pubmed_vanilla_document_10000_0.0003","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("kw_pubmed_vanilla_document_10000_0.0003", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|kw_pubmed_vanilla_document_10000_0.0003| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|408.1 MB| + +## References + +https://huggingface.co/enoriega/kw_pubmed_vanilla_document_10000_0.0003 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-kw_pubmed_vanilla_sentence_10000_0.0003_2_en.md b/docs/_posts/ahmedlone127/2023-09-12-kw_pubmed_vanilla_sentence_10000_0.0003_2_en.md new file mode 100644 index 00000000000000..3a43754a499d0a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-kw_pubmed_vanilla_sentence_10000_0.0003_2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English kw_pubmed_vanilla_sentence_10000_0.0003_2 BertEmbeddings from enoriega +author: John Snow Labs +name: kw_pubmed_vanilla_sentence_10000_0.0003_2 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`kw_pubmed_vanilla_sentence_10000_0.0003_2` is a English model originally trained by enoriega. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/kw_pubmed_vanilla_sentence_10000_0.0003_2_en_5.1.1_3.0_1694553337510.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/kw_pubmed_vanilla_sentence_10000_0.0003_2_en_5.1.1_3.0_1694553337510.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("kw_pubmed_vanilla_sentence_10000_0.0003_2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("kw_pubmed_vanilla_sentence_10000_0.0003_2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|kw_pubmed_vanilla_sentence_10000_0.0003_2| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|408.1 MB| + +## References + +https://huggingface.co/enoriega/kw_pubmed_vanilla_sentence_10000_0.0003_2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-kw_pubmed_vanilla_sentence_10000_0.0003_en.md b/docs/_posts/ahmedlone127/2023-09-12-kw_pubmed_vanilla_sentence_10000_0.0003_en.md new file mode 100644 index 00000000000000..9406230adc3229 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-kw_pubmed_vanilla_sentence_10000_0.0003_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English kw_pubmed_vanilla_sentence_10000_0.0003 BertEmbeddings from enoriega +author: John Snow Labs +name: kw_pubmed_vanilla_sentence_10000_0.0003 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`kw_pubmed_vanilla_sentence_10000_0.0003` is a English model originally trained by enoriega. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/kw_pubmed_vanilla_sentence_10000_0.0003_en_5.1.1_3.0_1694548333195.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/kw_pubmed_vanilla_sentence_10000_0.0003_en_5.1.1_3.0_1694548333195.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("kw_pubmed_vanilla_sentence_10000_0.0003","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("kw_pubmed_vanilla_sentence_10000_0.0003", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|kw_pubmed_vanilla_sentence_10000_0.0003| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|408.1 MB| + +## References + +https://huggingface.co/enoriega/kw_pubmed_vanilla_sentence_10000_0.0003 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-kykim_bert_kor_base_korean_en.md b/docs/_posts/ahmedlone127/2023-09-12-kykim_bert_kor_base_korean_en.md new file mode 100644 index 00000000000000..e391d45933b48a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-kykim_bert_kor_base_korean_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English kykim_bert_kor_base_korean BertEmbeddings from koala +author: John Snow Labs +name: kykim_bert_kor_base_korean +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`kykim_bert_kor_base_korean` is a English model originally trained by koala. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/kykim_bert_kor_base_korean_en_5.1.1_3.0_1694510273861.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/kykim_bert_kor_base_korean_en_5.1.1_3.0_1694510273861.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("kykim_bert_kor_base_korean","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("kykim_bert_kor_base_korean", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|kykim_bert_kor_base_korean| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|441.2 MB| + +## References + +https://huggingface.co/koala/kykim-bert-kor-base-ko \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-legalbertpt_fp_en.md b/docs/_posts/ahmedlone127/2023-09-12-legalbertpt_fp_en.md new file mode 100644 index 00000000000000..9f8486bd7aaeb8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-legalbertpt_fp_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English legalbertpt_fp BertEmbeddings from raquelsilveira +author: John Snow Labs +name: legalbertpt_fp +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`legalbertpt_fp` is a English model originally trained by raquelsilveira. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/legalbertpt_fp_en_5.1.1_3.0_1694549902826.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/legalbertpt_fp_en_5.1.1_3.0_1694549902826.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("legalbertpt_fp","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("legalbertpt_fp", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|legalbertpt_fp| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|405.8 MB| + +## References + +https://huggingface.co/raquelsilveira/legalbertpt_fp \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-legalbertpt_sardinian_en.md b/docs/_posts/ahmedlone127/2023-09-12-legalbertpt_sardinian_en.md new file mode 100644 index 00000000000000..bfc82201ef1ea0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-legalbertpt_sardinian_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English legalbertpt_sardinian BertEmbeddings from raquelsilveira +author: John Snow Labs +name: legalbertpt_sardinian +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`legalbertpt_sardinian` is a English model originally trained by raquelsilveira. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/legalbertpt_sardinian_en_5.1.1_3.0_1694550099835.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/legalbertpt_sardinian_en_5.1.1_3.0_1694550099835.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("legalbertpt_sardinian","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("legalbertpt_sardinian", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|legalbertpt_sardinian| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|425.1 MB| + +## References + +https://huggingface.co/raquelsilveira/legalbertpt_sc \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-lernnavi_bert_en.md b/docs/_posts/ahmedlone127/2023-09-12-lernnavi_bert_en.md new file mode 100644 index 00000000000000..65ebce65f7dec0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-lernnavi_bert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English lernnavi_bert BertEmbeddings from lucazed +author: John Snow Labs +name: lernnavi_bert +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`lernnavi_bert` is a English model originally trained by lucazed. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/lernnavi_bert_en_5.1.1_3.0_1694561382262.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/lernnavi_bert_en_5.1.1_3.0_1694561382262.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("lernnavi_bert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("lernnavi_bert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|lernnavi_bert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|409.9 MB| + +## References + +https://huggingface.co/lucazed/lernnavi_bert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-lf_model_01_en.md b/docs/_posts/ahmedlone127/2023-09-12-lf_model_01_en.md new file mode 100644 index 00000000000000..93273c267fa6f8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-lf_model_01_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English lf_model_01 BertEmbeddings from lf +author: John Snow Labs +name: lf_model_01 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`lf_model_01` is a English model originally trained by lf. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/lf_model_01_en_5.1.1_3.0_1694548430541.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/lf_model_01_en_5.1.1_3.0_1694548430541.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("lf_model_01","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("lf_model_01", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|lf_model_01| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/lf/lf_model_01 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-lm_financial_v2_en.md b/docs/_posts/ahmedlone127/2023-09-12-lm_financial_v2_en.md new file mode 100644 index 00000000000000..06a7fec6d55cc3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-lm_financial_v2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English lm_financial_v2 BertEmbeddings from anablasi +author: John Snow Labs +name: lm_financial_v2 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`lm_financial_v2` is a English model originally trained by anablasi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/lm_financial_v2_en_5.1.1_3.0_1694551202277.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/lm_financial_v2_en_5.1.1_3.0_1694551202277.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("lm_financial_v2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("lm_financial_v2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|lm_financial_v2| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/anablasi/lm_financial_v2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-logion_base_en.md b/docs/_posts/ahmedlone127/2023-09-12-logion_base_en.md new file mode 100644 index 00000000000000..a492b8bb4662be --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-logion_base_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English logion_base BertEmbeddings from cabrooks +author: John Snow Labs +name: logion_base +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`logion_base` is a English model originally trained by cabrooks. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/logion_base_en_5.1.1_3.0_1694551525035.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/logion_base_en_5.1.1_3.0_1694551525035.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("logion_base","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("logion_base", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|logion_base| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|420.8 MB| + +## References + +https://huggingface.co/cabrooks/LOGION-base \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-m3_mlm_en.md b/docs/_posts/ahmedlone127/2023-09-12-m3_mlm_en.md new file mode 100644 index 00000000000000..5eef13ec6a635a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-m3_mlm_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_mlm BertEmbeddings from S2312dal +author: John Snow Labs +name: m3_mlm +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_mlm` is a English model originally trained by S2312dal. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_mlm_en_5.1.1_3.0_1694551397577.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_mlm_en_5.1.1_3.0_1694551397577.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("m3_mlm","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("m3_mlm", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_mlm| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.9 MB| + +## References + +https://huggingface.co/S2312dal/M3_MLM \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-m6_mlm_en.md b/docs/_posts/ahmedlone127/2023-09-12-m6_mlm_en.md new file mode 100644 index 00000000000000..e853cd0fe38210 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-m6_mlm_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m6_mlm BertEmbeddings from S2312dal +author: John Snow Labs +name: m6_mlm +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m6_mlm` is a English model originally trained by S2312dal. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m6_mlm_en_5.1.1_3.0_1694551618334.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m6_mlm_en_5.1.1_3.0_1694551618334.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("m6_mlm","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("m6_mlm", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m6_mlm| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/S2312dal/M6_MLM \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-marathi_bert_mr.md b/docs/_posts/ahmedlone127/2023-09-12-marathi_bert_mr.md new file mode 100644 index 00000000000000..feebdcd8cbd918 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-marathi_bert_mr.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Marathi marathi_bert BertEmbeddings from l3cube-pune +author: John Snow Labs +name: marathi_bert +date: 2023-09-12 +tags: [bert, mr, open_source, fill_mask, onnx] +task: Embeddings +language: mr +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`marathi_bert` is a Marathi model originally trained by l3cube-pune. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/marathi_bert_mr_5.1.1_3.0_1694547462490.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/marathi_bert_mr_5.1.1_3.0_1694547462490.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("marathi_bert","mr") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("marathi_bert", "mr") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|marathi_bert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|mr| +|Size:|665.1 MB| + +## References + +https://huggingface.co/l3cube-pune/marathi-bert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-masked_2ktweets_conflibert_cont_cased_en.md b/docs/_posts/ahmedlone127/2023-09-12-masked_2ktweets_conflibert_cont_cased_en.md new file mode 100644 index 00000000000000..c4c0a9c117f5a3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-masked_2ktweets_conflibert_cont_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English masked_2ktweets_conflibert_cont_cased BertEmbeddings from ipadmanaban +author: John Snow Labs +name: masked_2ktweets_conflibert_cont_cased +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`masked_2ktweets_conflibert_cont_cased` is a English model originally trained by ipadmanaban. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/masked_2ktweets_conflibert_cont_cased_en_5.1.1_3.0_1694508985703.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/masked_2ktweets_conflibert_cont_cased_en_5.1.1_3.0_1694508985703.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("masked_2ktweets_conflibert_cont_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("masked_2ktweets_conflibert_cont_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|masked_2ktweets_conflibert_cont_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|402.9 MB| + +## References + +https://huggingface.co/ipadmanaban/Masked-2KTweets-ConfliBERT-cont-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-masked_2ktweets_conflibert_cont_uncased_en.md b/docs/_posts/ahmedlone127/2023-09-12-masked_2ktweets_conflibert_cont_uncased_en.md new file mode 100644 index 00000000000000..9a37d284962cf0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-masked_2ktweets_conflibert_cont_uncased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English masked_2ktweets_conflibert_cont_uncased BertEmbeddings from ipadmanaban +author: John Snow Labs +name: masked_2ktweets_conflibert_cont_uncased +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`masked_2ktweets_conflibert_cont_uncased` is a English model originally trained by ipadmanaban. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/masked_2ktweets_conflibert_cont_uncased_en_5.1.1_3.0_1694509108326.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/masked_2ktweets_conflibert_cont_uncased_en_5.1.1_3.0_1694509108326.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("masked_2ktweets_conflibert_cont_uncased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("masked_2ktweets_conflibert_cont_uncased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|masked_2ktweets_conflibert_cont_uncased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|406.8 MB| + +## References + +https://huggingface.co/ipadmanaban/Masked-2KTweets-ConfliBERT-cont-uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-masked_2ktweets_conflibert_scr_cased_en.md b/docs/_posts/ahmedlone127/2023-09-12-masked_2ktweets_conflibert_scr_cased_en.md new file mode 100644 index 00000000000000..316c787c916fe5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-masked_2ktweets_conflibert_scr_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English masked_2ktweets_conflibert_scr_cased BertEmbeddings from ipadmanaban +author: John Snow Labs +name: masked_2ktweets_conflibert_scr_cased +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`masked_2ktweets_conflibert_scr_cased` is a English model originally trained by ipadmanaban. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/masked_2ktweets_conflibert_scr_cased_en_5.1.1_3.0_1694508731836.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/masked_2ktweets_conflibert_scr_cased_en_5.1.1_3.0_1694508731836.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("masked_2ktweets_conflibert_scr_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("masked_2ktweets_conflibert_scr_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|masked_2ktweets_conflibert_scr_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|406.1 MB| + +## References + +https://huggingface.co/ipadmanaban/Masked-2KTweets-ConfliBERT-scr-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-masked_2ktweets_conflibert_scr_uncased_en.md b/docs/_posts/ahmedlone127/2023-09-12-masked_2ktweets_conflibert_scr_uncased_en.md new file mode 100644 index 00000000000000..e007524b59d832 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-masked_2ktweets_conflibert_scr_uncased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English masked_2ktweets_conflibert_scr_uncased BertEmbeddings from ipadmanaban +author: John Snow Labs +name: masked_2ktweets_conflibert_scr_uncased +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`masked_2ktweets_conflibert_scr_uncased` is a English model originally trained by ipadmanaban. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/masked_2ktweets_conflibert_scr_uncased_en_5.1.1_3.0_1694508845965.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/masked_2ktweets_conflibert_scr_uncased_en_5.1.1_3.0_1694508845965.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("masked_2ktweets_conflibert_scr_uncased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("masked_2ktweets_conflibert_scr_uncased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|masked_2ktweets_conflibert_scr_uncased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|406.0 MB| + +## References + +https://huggingface.co/ipadmanaban/Masked-2KTweets-ConfliBERT-scr-uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-matscibert_en.md b/docs/_posts/ahmedlone127/2023-09-12-matscibert_en.md new file mode 100644 index 00000000000000..200e1f51c320b8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-matscibert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English matscibert BertEmbeddings from m3rg-iitd +author: John Snow Labs +name: matscibert +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`matscibert` is a English model originally trained by m3rg-iitd. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/matscibert_en_5.1.1_3.0_1694550983161.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/matscibert_en_5.1.1_3.0_1694550983161.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("matscibert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("matscibert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|matscibert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|409.9 MB| + +## References + +https://huggingface.co/m3rg-iitd/matscibert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-mbert_arabic_c19_ar.md b/docs/_posts/ahmedlone127/2023-09-12-mbert_arabic_c19_ar.md new file mode 100644 index 00000000000000..424a213070f24f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-mbert_arabic_c19_ar.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Arabic mbert_arabic_c19 BertEmbeddings from moha +author: John Snow Labs +name: mbert_arabic_c19 +date: 2023-09-12 +tags: [bert, ar, open_source, fill_mask, onnx] +task: Embeddings +language: ar +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mbert_arabic_c19` is a Arabic model originally trained by moha. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mbert_arabic_c19_ar_5.1.1_3.0_1694554545798.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mbert_arabic_c19_ar_5.1.1_3.0_1694554545798.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("mbert_arabic_c19","ar") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("mbert_arabic_c19", "ar") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mbert_arabic_c19| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|ar| +|Size:|624.7 MB| + +## References + +https://huggingface.co/moha/mbert_ar_c19 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-mbert_resp_english_chinese_en.md b/docs/_posts/ahmedlone127/2023-09-12-mbert_resp_english_chinese_en.md new file mode 100644 index 00000000000000..d26011b5a91d9c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-mbert_resp_english_chinese_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English mbert_resp_english_chinese BertEmbeddings from nikitam +author: John Snow Labs +name: mbert_resp_english_chinese +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mbert_resp_english_chinese` is a English model originally trained by nikitam. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mbert_resp_english_chinese_en_5.1.1_3.0_1694558912316.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mbert_resp_english_chinese_en_5.1.1_3.0_1694558912316.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("mbert_resp_english_chinese","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("mbert_resp_english_chinese", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mbert_resp_english_chinese| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|625.0 MB| + +## References + +https://huggingface.co/nikitam/mbert-resp-en-zh \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-mbert_resp_english_german_en.md b/docs/_posts/ahmedlone127/2023-09-12-mbert_resp_english_german_en.md new file mode 100644 index 00000000000000..4e6a2f0393f96e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-mbert_resp_english_german_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English mbert_resp_english_german BertEmbeddings from nikitam +author: John Snow Labs +name: mbert_resp_english_german +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mbert_resp_english_german` is a English model originally trained by nikitam. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mbert_resp_english_german_en_5.1.1_3.0_1694558476877.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mbert_resp_english_german_en_5.1.1_3.0_1694558476877.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("mbert_resp_english_german","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("mbert_resp_english_german", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mbert_resp_english_german| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|625.2 MB| + +## References + +https://huggingface.co/nikitam/mbert-resp-en-de \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-mbert_resp_english_italian_en.md b/docs/_posts/ahmedlone127/2023-09-12-mbert_resp_english_italian_en.md new file mode 100644 index 00000000000000..3152cc6cee6769 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-mbert_resp_english_italian_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English mbert_resp_english_italian BertEmbeddings from nikitam +author: John Snow Labs +name: mbert_resp_english_italian +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mbert_resp_english_italian` is a English model originally trained by nikitam. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mbert_resp_english_italian_en_5.1.1_3.0_1694558696793.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mbert_resp_english_italian_en_5.1.1_3.0_1694558696793.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("mbert_resp_english_italian","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("mbert_resp_english_italian", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mbert_resp_english_italian| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|625.1 MB| + +## References + +https://huggingface.co/nikitam/mbert-resp-en-it \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-mbert_swedish_distilled_cased_sv.md b/docs/_posts/ahmedlone127/2023-09-12-mbert_swedish_distilled_cased_sv.md new file mode 100644 index 00000000000000..463773b8ada81e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-mbert_swedish_distilled_cased_sv.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Swedish mbert_swedish_distilled_cased BertEmbeddings from Addedk +author: John Snow Labs +name: mbert_swedish_distilled_cased +date: 2023-09-12 +tags: [bert, sv, open_source, fill_mask, onnx] +task: Embeddings +language: sv +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mbert_swedish_distilled_cased` is a Swedish model originally trained by Addedk. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mbert_swedish_distilled_cased_sv_5.1.1_3.0_1694549731872.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mbert_swedish_distilled_cased_sv_5.1.1_3.0_1694549731872.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("mbert_swedish_distilled_cased","sv") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("mbert_swedish_distilled_cased", "sv") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mbert_swedish_distilled_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|sv| +|Size:|505.7 MB| + +## References + +https://huggingface.co/Addedk/mbert-swedish-distilled-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-mbert_tlm_chat_english_chinese_en.md b/docs/_posts/ahmedlone127/2023-09-12-mbert_tlm_chat_english_chinese_en.md new file mode 100644 index 00000000000000..9f9a6aaca92bb9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-mbert_tlm_chat_english_chinese_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English mbert_tlm_chat_english_chinese BertEmbeddings from nikitam +author: John Snow Labs +name: mbert_tlm_chat_english_chinese +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mbert_tlm_chat_english_chinese` is a English model originally trained by nikitam. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mbert_tlm_chat_english_chinese_en_5.1.1_3.0_1694559504129.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mbert_tlm_chat_english_chinese_en_5.1.1_3.0_1694559504129.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("mbert_tlm_chat_english_chinese","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("mbert_tlm_chat_english_chinese", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mbert_tlm_chat_english_chinese| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|625.0 MB| + +## References + +https://huggingface.co/nikitam/mbert-tlm-chat-en-zh \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-mbert_tlm_chat_english_german_en.md b/docs/_posts/ahmedlone127/2023-09-12-mbert_tlm_chat_english_german_en.md new file mode 100644 index 00000000000000..8e156a6e394c00 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-mbert_tlm_chat_english_german_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English mbert_tlm_chat_english_german BertEmbeddings from nikitam +author: John Snow Labs +name: mbert_tlm_chat_english_german +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mbert_tlm_chat_english_german` is a English model originally trained by nikitam. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mbert_tlm_chat_english_german_en_5.1.1_3.0_1694559103177.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mbert_tlm_chat_english_german_en_5.1.1_3.0_1694559103177.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("mbert_tlm_chat_english_german","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("mbert_tlm_chat_english_german", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mbert_tlm_chat_english_german| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|625.2 MB| + +## References + +https://huggingface.co/nikitam/mbert-tlm-chat-en-de \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-mbert_tlm_chat_english_italian_en.md b/docs/_posts/ahmedlone127/2023-09-12-mbert_tlm_chat_english_italian_en.md new file mode 100644 index 00000000000000..5525b45002cd0f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-mbert_tlm_chat_english_italian_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English mbert_tlm_chat_english_italian BertEmbeddings from nikitam +author: John Snow Labs +name: mbert_tlm_chat_english_italian +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mbert_tlm_chat_english_italian` is a English model originally trained by nikitam. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mbert_tlm_chat_english_italian_en_5.1.1_3.0_1694559329950.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mbert_tlm_chat_english_italian_en_5.1.1_3.0_1694559329950.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("mbert_tlm_chat_english_italian","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("mbert_tlm_chat_english_italian", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mbert_tlm_chat_english_italian| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|625.1 MB| + +## References + +https://huggingface.co/nikitam/mbert-tlm-chat-en-it \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-mbert_tlm_sent_english_chinese_en.md b/docs/_posts/ahmedlone127/2023-09-12-mbert_tlm_sent_english_chinese_en.md new file mode 100644 index 00000000000000..c7802343bfcfa9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-mbert_tlm_sent_english_chinese_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English mbert_tlm_sent_english_chinese BertEmbeddings from nikitam +author: John Snow Labs +name: mbert_tlm_sent_english_chinese +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mbert_tlm_sent_english_chinese` is a English model originally trained by nikitam. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mbert_tlm_sent_english_chinese_en_5.1.1_3.0_1694560100127.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mbert_tlm_sent_english_chinese_en_5.1.1_3.0_1694560100127.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("mbert_tlm_sent_english_chinese","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("mbert_tlm_sent_english_chinese", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mbert_tlm_sent_english_chinese| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|622.3 MB| + +## References + +https://huggingface.co/nikitam/mbert-tlm-sent-en-zh \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-mbert_tlm_sent_english_german_en.md b/docs/_posts/ahmedlone127/2023-09-12-mbert_tlm_sent_english_german_en.md new file mode 100644 index 00000000000000..ae91b6ff3b9dc0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-mbert_tlm_sent_english_german_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English mbert_tlm_sent_english_german BertEmbeddings from nikitam +author: John Snow Labs +name: mbert_tlm_sent_english_german +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mbert_tlm_sent_english_german` is a English model originally trained by nikitam. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mbert_tlm_sent_english_german_en_5.1.1_3.0_1694559726822.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mbert_tlm_sent_english_german_en_5.1.1_3.0_1694559726822.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("mbert_tlm_sent_english_german","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("mbert_tlm_sent_english_german", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mbert_tlm_sent_english_german| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|624.8 MB| + +## References + +https://huggingface.co/nikitam/mbert-tlm-sent-en-de \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-mbert_xdm_english_german_en.md b/docs/_posts/ahmedlone127/2023-09-12-mbert_xdm_english_german_en.md new file mode 100644 index 00000000000000..56975392b75a88 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-mbert_xdm_english_german_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English mbert_xdm_english_german BertEmbeddings from nikitam +author: John Snow Labs +name: mbert_xdm_english_german +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mbert_xdm_english_german` is a English model originally trained by nikitam. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mbert_xdm_english_german_en_5.1.1_3.0_1694560316540.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mbert_xdm_english_german_en_5.1.1_3.0_1694560316540.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("mbert_xdm_english_german","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("mbert_xdm_english_german", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mbert_xdm_english_german| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|625.4 MB| + +## References + +https://huggingface.co/nikitam/mbert-xdm-en-de \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-mbert_xdm_english_italian_en.md b/docs/_posts/ahmedlone127/2023-09-12-mbert_xdm_english_italian_en.md new file mode 100644 index 00000000000000..ee756f126a4d0e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-mbert_xdm_english_italian_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English mbert_xdm_english_italian BertEmbeddings from nikitam +author: John Snow Labs +name: mbert_xdm_english_italian +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mbert_xdm_english_italian` is a English model originally trained by nikitam. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mbert_xdm_english_italian_en_5.1.1_3.0_1694560553745.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mbert_xdm_english_italian_en_5.1.1_3.0_1694560553745.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("mbert_xdm_english_italian","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("mbert_xdm_english_italian", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mbert_xdm_english_italian| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|625.4 MB| + +## References + +https://huggingface.co/nikitam/mbert-xdm-en-it \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-miem_scibert_linguistic_en.md b/docs/_posts/ahmedlone127/2023-09-12-miem_scibert_linguistic_en.md new file mode 100644 index 00000000000000..cef5decb871541 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-miem_scibert_linguistic_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English miem_scibert_linguistic BertEmbeddings from miemBertProject +author: John Snow Labs +name: miem_scibert_linguistic +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`miem_scibert_linguistic` is a English model originally trained by miemBertProject. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/miem_scibert_linguistic_en_5.1.1_3.0_1694549092975.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/miem_scibert_linguistic_en_5.1.1_3.0_1694549092975.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("miem_scibert_linguistic","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("miem_scibert_linguistic", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|miem_scibert_linguistic| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|657.4 MB| + +## References + +https://huggingface.co/miemBertProject/miem-scibert-linguistic \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-minilm_finetuned_imdb_accelerate_en.md b/docs/_posts/ahmedlone127/2023-09-12-minilm_finetuned_imdb_accelerate_en.md new file mode 100644 index 00000000000000..7fe241693778a8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-minilm_finetuned_imdb_accelerate_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English minilm_finetuned_imdb_accelerate BertEmbeddings from lewtun +author: John Snow Labs +name: minilm_finetuned_imdb_accelerate +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`minilm_finetuned_imdb_accelerate` is a English model originally trained by lewtun. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/minilm_finetuned_imdb_accelerate_en_5.1.1_3.0_1694548276227.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/minilm_finetuned_imdb_accelerate_en_5.1.1_3.0_1694548276227.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("minilm_finetuned_imdb_accelerate","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("minilm_finetuned_imdb_accelerate", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|minilm_finetuned_imdb_accelerate| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|124.5 MB| + +## References + +https://huggingface.co/lewtun/minilm-finetuned-imdb-accelerate \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-minilm_l12_h384_uncased_finetuned_imdb_en.md b/docs/_posts/ahmedlone127/2023-09-12-minilm_l12_h384_uncased_finetuned_imdb_en.md new file mode 100644 index 00000000000000..e339d027c3b24c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-minilm_l12_h384_uncased_finetuned_imdb_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English minilm_l12_h384_uncased_finetuned_imdb BertEmbeddings from lewtun +author: John Snow Labs +name: minilm_l12_h384_uncased_finetuned_imdb +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`minilm_l12_h384_uncased_finetuned_imdb` is a English model originally trained by lewtun. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/minilm_l12_h384_uncased_finetuned_imdb_en_5.1.1_3.0_1694548053126.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/minilm_l12_h384_uncased_finetuned_imdb_en_5.1.1_3.0_1694548053126.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("minilm_l12_h384_uncased_finetuned_imdb","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("minilm_l12_h384_uncased_finetuned_imdb", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|minilm_l12_h384_uncased_finetuned_imdb| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|124.5 MB| + +## References + +https://huggingface.co/lewtun/MiniLM-L12-H384-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-minilm_l_12_stackoverflow_en.md b/docs/_posts/ahmedlone127/2023-09-12-minilm_l_12_stackoverflow_en.md new file mode 100644 index 00000000000000..14aec5f9c15ffb --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-minilm_l_12_stackoverflow_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English minilm_l_12_stackoverflow BertEmbeddings from M-Chimiste +author: John Snow Labs +name: minilm_l_12_stackoverflow +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`minilm_l_12_stackoverflow` is a English model originally trained by M-Chimiste. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/minilm_l_12_stackoverflow_en_5.1.1_3.0_1694552531487.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/minilm_l_12_stackoverflow_en_5.1.1_3.0_1694552531487.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("minilm_l_12_stackoverflow","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("minilm_l_12_stackoverflow", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|minilm_l_12_stackoverflow| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|124.7 MB| + +## References + +https://huggingface.co/M-Chimiste/MiniLM-L-12-StackOverflow \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-minilmv2_l6_h384_distilled_from_bert_base_en.md b/docs/_posts/ahmedlone127/2023-09-12-minilmv2_l6_h384_distilled_from_bert_base_en.md new file mode 100644 index 00000000000000..48068504c90f0f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-minilmv2_l6_h384_distilled_from_bert_base_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English minilmv2_l6_h384_distilled_from_bert_base BertEmbeddings from nreimers +author: John Snow Labs +name: minilmv2_l6_h384_distilled_from_bert_base +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`minilmv2_l6_h384_distilled_from_bert_base` is a English model originally trained by nreimers. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/minilmv2_l6_h384_distilled_from_bert_base_en_5.1.1_3.0_1694562156137.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/minilmv2_l6_h384_distilled_from_bert_base_en_5.1.1_3.0_1694562156137.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("minilmv2_l6_h384_distilled_from_bert_base","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("minilmv2_l6_h384_distilled_from_bert_base", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|minilmv2_l6_h384_distilled_from_bert_base| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|54.1 MB| + +## References + +https://huggingface.co/nreimers/MiniLMv2-L6-H384-distilled-from-BERT-Base \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-minilmv2_l6_h384_distilled_from_bert_large_en.md b/docs/_posts/ahmedlone127/2023-09-12-minilmv2_l6_h384_distilled_from_bert_large_en.md new file mode 100644 index 00000000000000..d7dbada6e7d0b9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-minilmv2_l6_h384_distilled_from_bert_large_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English minilmv2_l6_h384_distilled_from_bert_large BertEmbeddings from nreimers +author: John Snow Labs +name: minilmv2_l6_h384_distilled_from_bert_large +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`minilmv2_l6_h384_distilled_from_bert_large` is a English model originally trained by nreimers. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/minilmv2_l6_h384_distilled_from_bert_large_en_5.1.1_3.0_1694562262970.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/minilmv2_l6_h384_distilled_from_bert_large_en_5.1.1_3.0_1694562262970.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("minilmv2_l6_h384_distilled_from_bert_large","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("minilmv2_l6_h384_distilled_from_bert_large", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|minilmv2_l6_h384_distilled_from_bert_large| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|54.2 MB| + +## References + +https://huggingface.co/nreimers/MiniLMv2-L6-H384-distilled-from-BERT-Large \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-minilmv2_l6_h768_distilled_from_bert_base_en.md b/docs/_posts/ahmedlone127/2023-09-12-minilmv2_l6_h768_distilled_from_bert_base_en.md new file mode 100644 index 00000000000000..47347c85f09f3f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-minilmv2_l6_h768_distilled_from_bert_base_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English minilmv2_l6_h768_distilled_from_bert_base BertEmbeddings from nreimers +author: John Snow Labs +name: minilmv2_l6_h768_distilled_from_bert_base +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`minilmv2_l6_h768_distilled_from_bert_base` is a English model originally trained by nreimers. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/minilmv2_l6_h768_distilled_from_bert_base_en_5.1.1_3.0_1694562396390.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/minilmv2_l6_h768_distilled_from_bert_base_en_5.1.1_3.0_1694562396390.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("minilmv2_l6_h768_distilled_from_bert_base","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("minilmv2_l6_h768_distilled_from_bert_base", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|minilmv2_l6_h768_distilled_from_bert_base| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|158.7 MB| + +## References + +https://huggingface.co/nreimers/MiniLMv2-L6-H768-distilled-from-BERT-Base \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-mizbert_en.md b/docs/_posts/ahmedlone127/2023-09-12-mizbert_en.md new file mode 100644 index 00000000000000..6892e5120e8096 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-mizbert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English mizbert BertEmbeddings from robzchhangte +author: John Snow Labs +name: mizbert +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mizbert` is a English model originally trained by robzchhangte. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mizbert_en_5.1.1_3.0_1694556095325.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mizbert_en_5.1.1_3.0_1694556095325.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("mizbert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("mizbert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mizbert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/robzchhangte/MizBERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-mlm_20230503_indobert_base_p1_combined_001_en.md b/docs/_posts/ahmedlone127/2023-09-12-mlm_20230503_indobert_base_p1_combined_001_en.md new file mode 100644 index 00000000000000..7696cc278c79aa --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-mlm_20230503_indobert_base_p1_combined_001_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English mlm_20230503_indobert_base_p1_combined_001 BertEmbeddings from intanm +author: John Snow Labs +name: mlm_20230503_indobert_base_p1_combined_001 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mlm_20230503_indobert_base_p1_combined_001` is a English model originally trained by intanm. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mlm_20230503_indobert_base_p1_combined_001_en_5.1.1_3.0_1694551387527.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mlm_20230503_indobert_base_p1_combined_001_en_5.1.1_3.0_1694551387527.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("mlm_20230503_indobert_base_p1_combined_001","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("mlm_20230503_indobert_base_p1_combined_001", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mlm_20230503_indobert_base_p1_combined_001| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|464.3 MB| + +## References + +https://huggingface.co/intanm/mlm-20230503-indobert-base-p1-combined-001 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-mlm_20230503_indobert_large_p1_001_en.md b/docs/_posts/ahmedlone127/2023-09-12-mlm_20230503_indobert_large_p1_001_en.md new file mode 100644 index 00000000000000..b805b7b8824326 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-mlm_20230503_indobert_large_p1_001_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English mlm_20230503_indobert_large_p1_001 BertEmbeddings from intanm +author: John Snow Labs +name: mlm_20230503_indobert_large_p1_001 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mlm_20230503_indobert_large_p1_001` is a English model originally trained by intanm. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mlm_20230503_indobert_large_p1_001_en_5.1.1_3.0_1694550821233.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mlm_20230503_indobert_large_p1_001_en_5.1.1_3.0_1694550821233.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("mlm_20230503_indobert_large_p1_001","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("mlm_20230503_indobert_large_p1_001", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mlm_20230503_indobert_large_p1_001| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/intanm/mlm-20230503-indobert-large-p1-001 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-mlm_20230510_indobert_large_p1_001_pt2_en.md b/docs/_posts/ahmedlone127/2023-09-12-mlm_20230510_indobert_large_p1_001_pt2_en.md new file mode 100644 index 00000000000000..18a543b38a09f5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-mlm_20230510_indobert_large_p1_001_pt2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English mlm_20230510_indobert_large_p1_001_pt2 BertEmbeddings from intanm +author: John Snow Labs +name: mlm_20230510_indobert_large_p1_001_pt2 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mlm_20230510_indobert_large_p1_001_pt2` is a English model originally trained by intanm. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mlm_20230510_indobert_large_p1_001_pt2_en_5.1.1_3.0_1694557161327.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mlm_20230510_indobert_large_p1_001_pt2_en_5.1.1_3.0_1694557161327.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("mlm_20230510_indobert_large_p1_001_pt2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("mlm_20230510_indobert_large_p1_001_pt2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mlm_20230510_indobert_large_p1_001_pt2| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/intanm/mlm-20230510-indobert-large-p1-001-pt2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-mlm_20230511_indobert_large_p1_combined_pt1_en.md b/docs/_posts/ahmedlone127/2023-09-12-mlm_20230511_indobert_large_p1_combined_pt1_en.md new file mode 100644 index 00000000000000..9dbe37041d55f9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-mlm_20230511_indobert_large_p1_combined_pt1_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English mlm_20230511_indobert_large_p1_combined_pt1 BertEmbeddings from intanm +author: John Snow Labs +name: mlm_20230511_indobert_large_p1_combined_pt1 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mlm_20230511_indobert_large_p1_combined_pt1` is a English model originally trained by intanm. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mlm_20230511_indobert_large_p1_combined_pt1_en_5.1.1_3.0_1694557729072.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mlm_20230511_indobert_large_p1_combined_pt1_en_5.1.1_3.0_1694557729072.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("mlm_20230511_indobert_large_p1_combined_pt1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("mlm_20230511_indobert_large_p1_combined_pt1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mlm_20230511_indobert_large_p1_combined_pt1| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/intanm/mlm-20230511-indobert-large-p1-combined-pt1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-mlm_20230513_indobert_large_p1_combined_pt2_en.md b/docs/_posts/ahmedlone127/2023-09-12-mlm_20230513_indobert_large_p1_combined_pt2_en.md new file mode 100644 index 00000000000000..831aff4aaf1d0d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-mlm_20230513_indobert_large_p1_combined_pt2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English mlm_20230513_indobert_large_p1_combined_pt2 BertEmbeddings from intanm +author: John Snow Labs +name: mlm_20230513_indobert_large_p1_combined_pt2 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mlm_20230513_indobert_large_p1_combined_pt2` is a English model originally trained by intanm. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mlm_20230513_indobert_large_p1_combined_pt2_en_5.1.1_3.0_1694560283117.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mlm_20230513_indobert_large_p1_combined_pt2_en_5.1.1_3.0_1694560283117.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("mlm_20230513_indobert_large_p1_combined_pt2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("mlm_20230513_indobert_large_p1_combined_pt2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mlm_20230513_indobert_large_p1_combined_pt2| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/intanm/mlm-20230513-indobert-large-p1-combined-pt2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-model202109_en.md b/docs/_posts/ahmedlone127/2023-09-12-model202109_en.md new file mode 100644 index 00000000000000..4cf88950d8266d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-model202109_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English model202109 BertEmbeddings from lyx10290516 +author: John Snow Labs +name: model202109 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`model202109` is a English model originally trained by lyx10290516. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/model202109_en_5.1.1_3.0_1694550495262.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/model202109_en_5.1.1_3.0_1694550495262.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("model202109","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("model202109", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|model202109| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/lyx10290516/model202109 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-mybert_en.md b/docs/_posts/ahmedlone127/2023-09-12-mybert_en.md new file mode 100644 index 00000000000000..93e3eacbe46f8d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-mybert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English mybert BertEmbeddings from RavenK +author: John Snow Labs +name: mybert +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mybert` is a English model originally trained by RavenK. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mybert_en_5.1.1_3.0_1694548245920.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mybert_en_5.1.1_3.0_1694548245920.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("mybert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("mybert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mybert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/RavenK/mybert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-mymodel007_lbh020300_en.md b/docs/_posts/ahmedlone127/2023-09-12-mymodel007_lbh020300_en.md new file mode 100644 index 00000000000000..633fab71a48ce9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-mymodel007_lbh020300_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English mymodel007_lbh020300 BertEmbeddings from lbh020300 +author: John Snow Labs +name: mymodel007_lbh020300 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mymodel007_lbh020300` is a English model originally trained by lbh020300. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mymodel007_lbh020300_en_5.1.1_3.0_1694547914311.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mymodel007_lbh020300_en_5.1.1_3.0_1694547914311.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("mymodel007_lbh020300","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("mymodel007_lbh020300", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mymodel007_lbh020300| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/lbh020300/mymodel007 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-myrubert_tiny2_en.md b/docs/_posts/ahmedlone127/2023-09-12-myrubert_tiny2_en.md new file mode 100644 index 00000000000000..a4f1b599f7653c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-myrubert_tiny2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English myrubert_tiny2 BertEmbeddings from nlp-testing +author: John Snow Labs +name: myrubert_tiny2 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`myrubert_tiny2` is a English model originally trained by nlp-testing. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/myrubert_tiny2_en_5.1.1_3.0_1694555345139.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/myrubert_tiny2_en_5.1.1_3.0_1694555345139.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("myrubert_tiny2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("myrubert_tiny2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|myrubert_tiny2| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|109.0 MB| + +## References + +https://huggingface.co/nlp-testing/myrubert-tiny2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-nepal_bhasa_bert_en.md b/docs/_posts/ahmedlone127/2023-09-12-nepal_bhasa_bert_en.md new file mode 100644 index 00000000000000..3595373bc137f1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-nepal_bhasa_bert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English nepal_bhasa_bert BertEmbeddings from onlydj96 +author: John Snow Labs +name: nepal_bhasa_bert +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`nepal_bhasa_bert` is a English model originally trained by onlydj96. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/nepal_bhasa_bert_en_5.1.1_3.0_1694549877596.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/nepal_bhasa_bert_en_5.1.1_3.0_1694549877596.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("nepal_bhasa_bert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("nepal_bhasa_bert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|nepal_bhasa_bert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|411.1 MB| + +## References + +https://huggingface.co/onlydj96/new_bert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-nontoxiccivilbert_en.md b/docs/_posts/ahmedlone127/2023-09-12-nontoxiccivilbert_en.md new file mode 100644 index 00000000000000..1b6223c22ac5e3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-nontoxiccivilbert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English nontoxiccivilbert BertEmbeddings from Ashokajou51 +author: John Snow Labs +name: nontoxiccivilbert +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`nontoxiccivilbert` is a English model originally trained by Ashokajou51. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/nontoxiccivilbert_en_5.1.1_3.0_1694558265623.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/nontoxiccivilbert_en_5.1.1_3.0_1694558265623.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("nontoxiccivilbert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("nontoxiccivilbert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|nontoxiccivilbert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.6 MB| + +## References + +https://huggingface.co/Ashokajou51/NonToxicCivilBert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-norbert2_no.md b/docs/_posts/ahmedlone127/2023-09-12-norbert2_no.md new file mode 100644 index 00000000000000..213b1d443ca767 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-norbert2_no.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Norwegian norbert2 BertEmbeddings from ltg +author: John Snow Labs +name: norbert2 +date: 2023-09-12 +tags: [bert, "no", open_source, fill_mask, onnx] +task: Embeddings +language: "no" +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`norbert2` is a Norwegian model originally trained by ltg. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/norbert2_no_5.1.1_3.0_1694549723258.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/norbert2_no_5.1.1_3.0_1694549723258.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("norbert2","no") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("norbert2", "no") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|norbert2| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|no| +|Size:|465.2 MB| + +## References + +https://huggingface.co/ltg/norbert2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-norbert_no.md b/docs/_posts/ahmedlone127/2023-09-12-norbert_no.md new file mode 100644 index 00000000000000..a4df72e7efb168 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-norbert_no.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Norwegian norbert BertEmbeddings from ltg +author: John Snow Labs +name: norbert +date: 2023-09-12 +tags: [bert, "no", open_source, fill_mask, onnx] +task: Embeddings +language: "no" +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`norbert` is a Norwegian model originally trained by ltg. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/norbert_no_5.1.1_3.0_1694549544686.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/norbert_no_5.1.1_3.0_1694549544686.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("norbert","no") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("norbert", "no") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|norbert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|no| +|Size:|415.1 MB| + +## References + +https://huggingface.co/ltg/norbert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-original_topic_sports_bert_en.md b/docs/_posts/ahmedlone127/2023-09-12-original_topic_sports_bert_en.md new file mode 100644 index 00000000000000..3232ea9752d74a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-original_topic_sports_bert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English original_topic_sports_bert BertEmbeddings from Kdogs +author: John Snow Labs +name: original_topic_sports_bert +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`original_topic_sports_bert` is a English model originally trained by Kdogs. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/original_topic_sports_bert_en_5.1.1_3.0_1694560482579.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/original_topic_sports_bert_en_5.1.1_3.0_1694560482579.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("original_topic_sports_bert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("original_topic_sports_bert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|original_topic_sports_bert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|412.3 MB| + +## References + +https://huggingface.co/Kdogs/original_topic-sports_bert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-pharmbert_cased_en.md b/docs/_posts/ahmedlone127/2023-09-12-pharmbert_cased_en.md new file mode 100644 index 00000000000000..ed24d3598bf5d2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-pharmbert_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English pharmbert_cased BertEmbeddings from Lianglab +author: John Snow Labs +name: pharmbert_cased +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`pharmbert_cased` is a English model originally trained by Lianglab. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/pharmbert_cased_en_5.1.1_3.0_1694557329356.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/pharmbert_cased_en_5.1.1_3.0_1694557329356.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("pharmbert_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("pharmbert_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|pharmbert_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.5 MB| + +## References + +https://huggingface.co/Lianglab/PharmBERT-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-pharmbert_uncased_en.md b/docs/_posts/ahmedlone127/2023-09-12-pharmbert_uncased_en.md new file mode 100644 index 00000000000000..5f2cb33bf1ba7e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-pharmbert_uncased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English pharmbert_uncased BertEmbeddings from Lianglab +author: John Snow Labs +name: pharmbert_uncased +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`pharmbert_uncased` is a English model originally trained by Lianglab. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/pharmbert_uncased_en_5.1.1_3.0_1694557457237.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/pharmbert_uncased_en_5.1.1_3.0_1694557457237.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("pharmbert_uncased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("pharmbert_uncased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|pharmbert_uncased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.0 MB| + +## References + +https://huggingface.co/Lianglab/PharmBERT-uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-politbert_en.md b/docs/_posts/ahmedlone127/2023-09-12-politbert_en.md new file mode 100644 index 00000000000000..195e729b9ccd71 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-politbert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English politbert BertEmbeddings from maurice +author: John Snow Labs +name: politbert +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`politbert` is a English model originally trained by maurice. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/politbert_en_5.1.1_3.0_1694552201401.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/politbert_en_5.1.1_3.0_1694552201401.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("politbert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("politbert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|politbert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.7 MB| + +## References + +https://huggingface.co/maurice/PolitBERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-politibeto_es.md b/docs/_posts/ahmedlone127/2023-09-12-politibeto_es.md new file mode 100644 index 00000000000000..bc5c7353660b4f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-politibeto_es.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Castilian, Spanish politibeto BertEmbeddings from nlp-cimat +author: John Snow Labs +name: politibeto +date: 2023-09-12 +tags: [bert, es, open_source, fill_mask, onnx] +task: Embeddings +language: es +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`politibeto` is a Castilian, Spanish model originally trained by nlp-cimat. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/politibeto_es_5.1.1_3.0_1694552289989.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/politibeto_es_5.1.1_3.0_1694552289989.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("politibeto","es") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("politibeto", "es") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|politibeto| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|es| +|Size:|409.6 MB| + +## References + +https://huggingface.co/nlp-cimat/politibeto \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-practice00_en.md b/docs/_posts/ahmedlone127/2023-09-12-practice00_en.md new file mode 100644 index 00000000000000..f9d0a6824d6643 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-practice00_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English practice00 BertEmbeddings from maroo93 +author: John Snow Labs +name: practice00 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`practice00` is a English model originally trained by maroo93. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/practice00_en_5.1.1_3.0_1694551890248.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/practice00_en_5.1.1_3.0_1694551890248.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("practice00","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("practice00", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|practice00| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|406.9 MB| + +## References + +https://huggingface.co/maroo93/practice00 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-practice01_en.md b/docs/_posts/ahmedlone127/2023-09-12-practice01_en.md new file mode 100644 index 00000000000000..5f9e32a79436c9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-practice01_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English practice01 BertEmbeddings from maroo93 +author: John Snow Labs +name: practice01 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`practice01` is a English model originally trained by maroo93. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/practice01_en_5.1.1_3.0_1694552050711.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/practice01_en_5.1.1_3.0_1694552050711.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("practice01","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("practice01", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|practice01| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|406.9 MB| + +## References + +https://huggingface.co/maroo93/practice01 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-prop_wiki_en.md b/docs/_posts/ahmedlone127/2023-09-12-prop_wiki_en.md new file mode 100644 index 00000000000000..aeb8ae5ad638cc --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-prop_wiki_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English prop_wiki BertEmbeddings from xyma +author: John Snow Labs +name: prop_wiki +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`prop_wiki` is a English model originally trained by xyma. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/prop_wiki_en_5.1.1_3.0_1694559423925.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/prop_wiki_en_5.1.1_3.0_1694559423925.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("prop_wiki","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("prop_wiki", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|prop_wiki| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/xyma/PROP-wiki \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-psych_search_en.md b/docs/_posts/ahmedlone127/2023-09-12-psych_search_en.md new file mode 100644 index 00000000000000..48d5a48e6daeaa --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-psych_search_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English psych_search BertEmbeddings from nlp4good +author: John Snow Labs +name: psych_search +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`psych_search` is a English model originally trained by nlp4good. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/psych_search_en_5.1.1_3.0_1694560934620.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/psych_search_en_5.1.1_3.0_1694560934620.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("psych_search","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("psych_search", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|psych_search| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|409.9 MB| + +## References + +https://huggingface.co/nlp4good/psych-search \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-push_hub_test_en.md b/docs/_posts/ahmedlone127/2023-09-12-push_hub_test_en.md new file mode 100644 index 00000000000000..b95fccd27651ae --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-push_hub_test_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English push_hub_test BertEmbeddings from ksmcg +author: John Snow Labs +name: push_hub_test +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`push_hub_test` is a English model originally trained by ksmcg. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/push_hub_test_en_5.1.1_3.0_1694510535660.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/push_hub_test_en_5.1.1_3.0_1694510535660.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("push_hub_test","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("push_hub_test", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|push_hub_test| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/ksmcg/push_hub_test \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-quranexe_bert_en.md b/docs/_posts/ahmedlone127/2023-09-12-quranexe_bert_en.md new file mode 100644 index 00000000000000..4bbdd21ce194d0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-quranexe_bert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English quranexe_bert BertEmbeddings from mustapha +author: John Snow Labs +name: quranexe_bert +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`quranexe_bert` is a English model originally trained by mustapha. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/quranexe_bert_en_5.1.1_3.0_1694558398605.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/quranexe_bert_en_5.1.1_3.0_1694558398605.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("quranexe_bert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("quranexe_bert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|quranexe_bert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|504.6 MB| + +## References + +https://huggingface.co/mustapha/QuranExe-bert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-recipe_clean_steps_en.md b/docs/_posts/ahmedlone127/2023-09-12-recipe_clean_steps_en.md new file mode 100644 index 00000000000000..417c168c798e0d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-recipe_clean_steps_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English recipe_clean_steps BertEmbeddings from paola-md +author: John Snow Labs +name: recipe_clean_steps +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`recipe_clean_steps` is a English model originally trained by paola-md. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/recipe_clean_steps_en_5.1.1_3.0_1694553199740.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/recipe_clean_steps_en_5.1.1_3.0_1694553199740.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("recipe_clean_steps","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("recipe_clean_steps", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|recipe_clean_steps| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|406.7 MB| + +## References + +https://huggingface.co/paola-md/recipe-clean_steps \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-recipe_icelandic_en.md b/docs/_posts/ahmedlone127/2023-09-12-recipe_icelandic_en.md new file mode 100644 index 00000000000000..1cb03fb36da554 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-recipe_icelandic_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English recipe_icelandic BertEmbeddings from paola-md +author: John Snow Labs +name: recipe_icelandic +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`recipe_icelandic` is a English model originally trained by paola-md. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/recipe_icelandic_en_5.1.1_3.0_1694552871118.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/recipe_icelandic_en_5.1.1_3.0_1694552871118.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("recipe_icelandic","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("recipe_icelandic", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|recipe_icelandic| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|406.7 MB| + +## References + +https://huggingface.co/paola-md/recipe-is \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-recipe_tis_en.md b/docs/_posts/ahmedlone127/2023-09-12-recipe_tis_en.md new file mode 100644 index 00000000000000..729cacd4bd5546 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-recipe_tis_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English recipe_tis BertEmbeddings from paola-md +author: John Snow Labs +name: recipe_tis +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`recipe_tis` is a English model originally trained by paola-md. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/recipe_tis_en_5.1.1_3.0_1694552674837.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/recipe_tis_en_5.1.1_3.0_1694552674837.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("recipe_tis","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("recipe_tis", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|recipe_tis| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|406.5 MB| + +## References + +https://huggingface.co/paola-md/recipe-tis \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-recipe_tsonga_en.md b/docs/_posts/ahmedlone127/2023-09-12-recipe_tsonga_en.md new file mode 100644 index 00000000000000..98ebf54472d533 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-recipe_tsonga_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English recipe_tsonga BertEmbeddings from paola-md +author: John Snow Labs +name: recipe_tsonga +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`recipe_tsonga` is a English model originally trained by paola-md. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/recipe_tsonga_en_5.1.1_3.0_1694553036897.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/recipe_tsonga_en_5.1.1_3.0_1694553036897.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("recipe_tsonga","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("recipe_tsonga", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|recipe_tsonga| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|406.6 MB| + +## References + +https://huggingface.co/paola-md/recipe-ts \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-regex_gb_2021_en.md b/docs/_posts/ahmedlone127/2023-09-12-regex_gb_2021_en.md new file mode 100644 index 00000000000000..f2188a7025182c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-regex_gb_2021_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English regex_gb_2021 BertEmbeddings from mossaic-candle +author: John Snow Labs +name: regex_gb_2021 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`regex_gb_2021` is a English model originally trained by mossaic-candle. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/regex_gb_2021_en_5.1.1_3.0_1694508052972.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/regex_gb_2021_en_5.1.1_3.0_1694508052972.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("regex_gb_2021","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("regex_gb_2021", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|regex_gb_2021| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|258.7 MB| + +## References + +https://huggingface.co/mossaic-candle/regex-gb-2021 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-relu_bert_base_uncased_en.md b/docs/_posts/ahmedlone127/2023-09-12-relu_bert_base_uncased_en.md new file mode 100644 index 00000000000000..6d7f6bb1a6785b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-relu_bert_base_uncased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English relu_bert_base_uncased BertEmbeddings from mpiorczynski +author: John Snow Labs +name: relu_bert_base_uncased +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`relu_bert_base_uncased` is a English model originally trained by mpiorczynski. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/relu_bert_base_uncased_en_5.1.1_3.0_1694552781766.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/relu_bert_base_uncased_en_5.1.1_3.0_1694552781766.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("relu_bert_base_uncased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("relu_bert_base_uncased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|relu_bert_base_uncased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/mpiorczynski/relu-bert-base-uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-review_en.md b/docs/_posts/ahmedlone127/2023-09-12-review_en.md new file mode 100644 index 00000000000000..197f37101b5c9a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-review_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English review BertEmbeddings from Hikam22 +author: John Snow Labs +name: review +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`review` is a English model originally trained by Hikam22. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/review_en_5.1.1_3.0_1694548928369.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/review_en_5.1.1_3.0_1694548928369.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("review","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("review", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|review| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|304.8 MB| + +## References + +https://huggingface.co/Hikam22/Review \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-sample_model_en.md b/docs/_posts/ahmedlone127/2023-09-12-sample_model_en.md new file mode 100644 index 00000000000000..912b4592909e62 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-sample_model_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English sample_model BertEmbeddings from paopao0226 +author: John Snow Labs +name: sample_model +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sample_model` is a English model originally trained by paopao0226. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sample_model_en_5.1.1_3.0_1694509238918.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sample_model_en_5.1.1_3.0_1694509238918.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("sample_model","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("sample_model", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sample_model| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.6 MB| + +## References + +https://huggingface.co/paopao0226/sample-model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-satd_identify_en.md b/docs/_posts/ahmedlone127/2023-09-12-satd_identify_en.md new file mode 100644 index 00000000000000..e993ed0a4e0b9b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-satd_identify_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English satd_identify BertEmbeddings from aavvvv +author: John Snow Labs +name: satd_identify +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`satd_identify` is a English model originally trained by aavvvv. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/satd_identify_en_5.1.1_3.0_1694557773164.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/satd_identify_en_5.1.1_3.0_1694557773164.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("satd_identify","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("satd_identify", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|satd_identify| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|408.0 MB| + +## References + +https://huggingface.co/aavvvv/satd-identify \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-scibert_lm_const_finetuned_20_en.md b/docs/_posts/ahmedlone127/2023-09-12-scibert_lm_const_finetuned_20_en.md new file mode 100644 index 00000000000000..95bdfad4001cd1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-scibert_lm_const_finetuned_20_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English scibert_lm_const_finetuned_20 BertEmbeddings from ariesutiono +author: John Snow Labs +name: scibert_lm_const_finetuned_20 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`scibert_lm_const_finetuned_20` is a English model originally trained by ariesutiono. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/scibert_lm_const_finetuned_20_en_5.1.1_3.0_1694559809758.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/scibert_lm_const_finetuned_20_en_5.1.1_3.0_1694559809758.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("scibert_lm_const_finetuned_20","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("scibert_lm_const_finetuned_20", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|scibert_lm_const_finetuned_20| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|499.5 MB| + +## References + +https://huggingface.co/ariesutiono/scibert-lm-const-finetuned-20 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-scibert_scivocab_finetuned_cord19_en.md b/docs/_posts/ahmedlone127/2023-09-12-scibert_scivocab_finetuned_cord19_en.md new file mode 100644 index 00000000000000..07418accd72d53 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-scibert_scivocab_finetuned_cord19_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English scibert_scivocab_finetuned_cord19 BertEmbeddings from mrm8488 +author: John Snow Labs +name: scibert_scivocab_finetuned_cord19 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`scibert_scivocab_finetuned_cord19` is a English model originally trained by mrm8488. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/scibert_scivocab_finetuned_cord19_en_5.1.1_3.0_1694556414909.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/scibert_scivocab_finetuned_cord19_en_5.1.1_3.0_1694556414909.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("scibert_scivocab_finetuned_cord19","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("scibert_scivocab_finetuned_cord19", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|scibert_scivocab_finetuned_cord19| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|409.8 MB| + +## References + +https://huggingface.co/mrm8488/scibert_scivocab-finetuned-CORD19 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-scibert_scivocab_uncased_ft_mlm_sdu21_ai_en.md b/docs/_posts/ahmedlone127/2023-09-12-scibert_scivocab_uncased_ft_mlm_sdu21_ai_en.md new file mode 100644 index 00000000000000..9565c044e1500c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-scibert_scivocab_uncased_ft_mlm_sdu21_ai_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English scibert_scivocab_uncased_ft_mlm_sdu21_ai BertEmbeddings from napsternxg +author: John Snow Labs +name: scibert_scivocab_uncased_ft_mlm_sdu21_ai +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`scibert_scivocab_uncased_ft_mlm_sdu21_ai` is a English model originally trained by napsternxg. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/scibert_scivocab_uncased_ft_mlm_sdu21_ai_en_5.1.1_3.0_1694557218055.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/scibert_scivocab_uncased_ft_mlm_sdu21_ai_en_5.1.1_3.0_1694557218055.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("scibert_scivocab_uncased_ft_mlm_sdu21_ai","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("scibert_scivocab_uncased_ft_mlm_sdu21_ai", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|scibert_scivocab_uncased_ft_mlm_sdu21_ai| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|410.0 MB| + +## References + +https://huggingface.co/napsternxg/scibert_scivocab_uncased_ft_mlm_SDU21_AI \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-sec_bert_num_en.md b/docs/_posts/ahmedlone127/2023-09-12-sec_bert_num_en.md new file mode 100644 index 00000000000000..469ff415dafe13 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-sec_bert_num_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English sec_bert_num BertEmbeddings from nlpaueb +author: John Snow Labs +name: sec_bert_num +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sec_bert_num` is a English model originally trained by nlpaueb. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sec_bert_num_en_5.1.1_3.0_1694561918110.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sec_bert_num_en_5.1.1_3.0_1694561918110.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("sec_bert_num","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("sec_bert_num", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sec_bert_num| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|406.6 MB| + +## References + +https://huggingface.co/nlpaueb/sec-bert-num \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-sec_bert_shape_en.md b/docs/_posts/ahmedlone127/2023-09-12-sec_bert_shape_en.md new file mode 100644 index 00000000000000..8a2d6149f908f4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-sec_bert_shape_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English sec_bert_shape BertEmbeddings from nlpaueb +author: John Snow Labs +name: sec_bert_shape +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sec_bert_shape` is a English model originally trained by nlpaueb. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sec_bert_shape_en_5.1.1_3.0_1694562038680.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sec_bert_shape_en_5.1.1_3.0_1694562038680.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("sec_bert_shape","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("sec_bert_shape", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sec_bert_shape| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|406.6 MB| + +## References + +https://huggingface.co/nlpaueb/sec-bert-shape \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-set_date_1_bert_base_uncased_finetuned_with_haystack_en.md b/docs/_posts/ahmedlone127/2023-09-12-set_date_1_bert_base_uncased_finetuned_with_haystack_en.md new file mode 100644 index 00000000000000..6edab9d5cd7a1c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-set_date_1_bert_base_uncased_finetuned_with_haystack_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English set_date_1_bert_base_uncased_finetuned_with_haystack BertEmbeddings from motiondew +author: John Snow Labs +name: set_date_1_bert_base_uncased_finetuned_with_haystack +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`set_date_1_bert_base_uncased_finetuned_with_haystack` is a English model originally trained by motiondew. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/set_date_1_bert_base_uncased_finetuned_with_haystack_en_5.1.1_3.0_1694556116394.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/set_date_1_bert_base_uncased_finetuned_with_haystack_en_5.1.1_3.0_1694556116394.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("set_date_1_bert_base_uncased_finetuned_with_haystack","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("set_date_1_bert_base_uncased_finetuned_with_haystack", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|set_date_1_bert_base_uncased_finetuned_with_haystack| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/motiondew/set_date_1_bert-base-uncased_finetuned_with_haystack \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-set_date_1_bert_en.md b/docs/_posts/ahmedlone127/2023-09-12-set_date_1_bert_en.md new file mode 100644 index 00000000000000..bf626cb2fa66c2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-set_date_1_bert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English set_date_1_bert BertEmbeddings from motiondew +author: John Snow Labs +name: set_date_1_bert +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`set_date_1_bert` is a English model originally trained by motiondew. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/set_date_1_bert_en_5.1.1_3.0_1694555975958.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/set_date_1_bert_en_5.1.1_3.0_1694555975958.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("set_date_1_bert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("set_date_1_bert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|set_date_1_bert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/motiondew/set_date_1-bert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-small_mlm_conll2003_custom_tokenizer_en.md b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_conll2003_custom_tokenizer_en.md new file mode 100644 index 00000000000000..b8ea3e2415a253 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_conll2003_custom_tokenizer_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English small_mlm_conll2003_custom_tokenizer BertEmbeddings from muhtasham +author: John Snow Labs +name: small_mlm_conll2003_custom_tokenizer +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`small_mlm_conll2003_custom_tokenizer` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/small_mlm_conll2003_custom_tokenizer_en_5.1.1_3.0_1694549308806.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/small_mlm_conll2003_custom_tokenizer_en_5.1.1_3.0_1694549308806.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("small_mlm_conll2003_custom_tokenizer","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("small_mlm_conll2003_custom_tokenizer", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|small_mlm_conll2003_custom_tokenizer| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|106.9 MB| + +## References + +https://huggingface.co/muhtasham/small-mlm-conll2003-custom-tokenizer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-small_mlm_conll2003_en.md b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_conll2003_en.md new file mode 100644 index 00000000000000..c76e180e686d07 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_conll2003_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English small_mlm_conll2003 BertEmbeddings from muhtasham +author: John Snow Labs +name: small_mlm_conll2003 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`small_mlm_conll2003` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/small_mlm_conll2003_en_5.1.1_3.0_1694549226176.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/small_mlm_conll2003_en_5.1.1_3.0_1694549226176.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("small_mlm_conll2003","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("small_mlm_conll2003", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|small_mlm_conll2003| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|107.0 MB| + +## References + +https://huggingface.co/muhtasham/small-mlm-conll2003 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-small_mlm_conll2003_from_scratch_en.md b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_conll2003_from_scratch_en.md new file mode 100644 index 00000000000000..07fbaa12c1f3cf --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_conll2003_from_scratch_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English small_mlm_conll2003_from_scratch BertEmbeddings from muhtasham +author: John Snow Labs +name: small_mlm_conll2003_from_scratch +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`small_mlm_conll2003_from_scratch` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/small_mlm_conll2003_from_scratch_en_5.1.1_3.0_1694555844684.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/small_mlm_conll2003_from_scratch_en_5.1.1_3.0_1694555844684.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("small_mlm_conll2003_from_scratch","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("small_mlm_conll2003_from_scratch", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|small_mlm_conll2003_from_scratch| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|107.0 MB| + +## References + +https://huggingface.co/muhtasham/small-mlm-conll2003-from-scratch \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_cola_custom_tokenizer_en.md b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_cola_custom_tokenizer_en.md new file mode 100644 index 00000000000000..6d9d9821b8bb12 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_cola_custom_tokenizer_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English small_mlm_glue_cola_custom_tokenizer BertEmbeddings from muhtasham +author: John Snow Labs +name: small_mlm_glue_cola_custom_tokenizer +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`small_mlm_glue_cola_custom_tokenizer` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/small_mlm_glue_cola_custom_tokenizer_en_5.1.1_3.0_1694509168848.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/small_mlm_glue_cola_custom_tokenizer_en_5.1.1_3.0_1694509168848.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("small_mlm_glue_cola_custom_tokenizer","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("small_mlm_glue_cola_custom_tokenizer", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|small_mlm_glue_cola_custom_tokenizer| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|106.7 MB| + +## References + +https://huggingface.co/muhtasham/small-mlm-glue-cola-custom-tokenizer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_cola_custom_tokenizer_expand_vocab_en.md b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_cola_custom_tokenizer_expand_vocab_en.md new file mode 100644 index 00000000000000..4174b6cd63d5ad --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_cola_custom_tokenizer_expand_vocab_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English small_mlm_glue_cola_custom_tokenizer_expand_vocab BertEmbeddings from muhtasham +author: John Snow Labs +name: small_mlm_glue_cola_custom_tokenizer_expand_vocab +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`small_mlm_glue_cola_custom_tokenizer_expand_vocab` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/small_mlm_glue_cola_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694561193785.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/small_mlm_glue_cola_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694561193785.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("small_mlm_glue_cola_custom_tokenizer_expand_vocab","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("small_mlm_glue_cola_custom_tokenizer_expand_vocab", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|small_mlm_glue_cola_custom_tokenizer_expand_vocab| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|112.1 MB| + +## References + +https://huggingface.co/muhtasham/small-mlm-glue-cola-custom-tokenizer-expand-vocab \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_cola_en.md b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_cola_en.md new file mode 100644 index 00000000000000..8f757685b7a828 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_cola_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English small_mlm_glue_cola BertEmbeddings from muhtasham +author: John Snow Labs +name: small_mlm_glue_cola +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`small_mlm_glue_cola` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/small_mlm_glue_cola_en_5.1.1_3.0_1694508239344.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/small_mlm_glue_cola_en_5.1.1_3.0_1694508239344.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("small_mlm_glue_cola","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("small_mlm_glue_cola", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|small_mlm_glue_cola| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|106.9 MB| + +## References + +https://huggingface.co/muhtasham/small-mlm-glue-cola \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_cola_from_scratch_custom_tokenizer_expand_vocab_en.md b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_cola_from_scratch_custom_tokenizer_expand_vocab_en.md new file mode 100644 index 00000000000000..24e218dc859816 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_cola_from_scratch_custom_tokenizer_expand_vocab_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English small_mlm_glue_cola_from_scratch_custom_tokenizer_expand_vocab BertEmbeddings from muhtasham +author: John Snow Labs +name: small_mlm_glue_cola_from_scratch_custom_tokenizer_expand_vocab +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`small_mlm_glue_cola_from_scratch_custom_tokenizer_expand_vocab` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/small_mlm_glue_cola_from_scratch_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694561372145.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/small_mlm_glue_cola_from_scratch_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694561372145.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("small_mlm_glue_cola_from_scratch_custom_tokenizer_expand_vocab","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("small_mlm_glue_cola_from_scratch_custom_tokenizer_expand_vocab", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|small_mlm_glue_cola_from_scratch_custom_tokenizer_expand_vocab| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|112.1 MB| + +## References + +https://huggingface.co/muhtasham/small-mlm-glue-cola-from-scratch-custom-tokenizer-expand-vocab \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_mnli_custom_tokenizer_en.md b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_mnli_custom_tokenizer_en.md new file mode 100644 index 00000000000000..c31269ced1c35b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_mnli_custom_tokenizer_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English small_mlm_glue_mnli_custom_tokenizer BertEmbeddings from muhtasham +author: John Snow Labs +name: small_mlm_glue_mnli_custom_tokenizer +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`small_mlm_glue_mnli_custom_tokenizer` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/small_mlm_glue_mnli_custom_tokenizer_en_5.1.1_3.0_1694509261561.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/small_mlm_glue_mnli_custom_tokenizer_en_5.1.1_3.0_1694509261561.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("small_mlm_glue_mnli_custom_tokenizer","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("small_mlm_glue_mnli_custom_tokenizer", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|small_mlm_glue_mnli_custom_tokenizer| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|106.9 MB| + +## References + +https://huggingface.co/muhtasham/small-mlm-glue-mnli-custom-tokenizer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_mnli_custom_tokenizer_expand_vocab_en.md b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_mnli_custom_tokenizer_expand_vocab_en.md new file mode 100644 index 00000000000000..043b61fafdca4f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_mnli_custom_tokenizer_expand_vocab_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English small_mlm_glue_mnli_custom_tokenizer_expand_vocab BertEmbeddings from muhtasham +author: John Snow Labs +name: small_mlm_glue_mnli_custom_tokenizer_expand_vocab +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`small_mlm_glue_mnli_custom_tokenizer_expand_vocab` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/small_mlm_glue_mnli_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694561610937.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/small_mlm_glue_mnli_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694561610937.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("small_mlm_glue_mnli_custom_tokenizer_expand_vocab","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("small_mlm_glue_mnli_custom_tokenizer_expand_vocab", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|small_mlm_glue_mnli_custom_tokenizer_expand_vocab| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|132.3 MB| + +## References + +https://huggingface.co/muhtasham/small-mlm-glue-mnli-custom-tokenizer-expand-vocab \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_mnli_en.md b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_mnli_en.md new file mode 100644 index 00000000000000..2332ba65e00450 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_mnli_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English small_mlm_glue_mnli BertEmbeddings from muhtasham +author: John Snow Labs +name: small_mlm_glue_mnli +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`small_mlm_glue_mnli` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/small_mlm_glue_mnli_en_5.1.1_3.0_1694508322641.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/small_mlm_glue_mnli_en_5.1.1_3.0_1694508322641.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("small_mlm_glue_mnli","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("small_mlm_glue_mnli", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|small_mlm_glue_mnli| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|107.0 MB| + +## References + +https://huggingface.co/muhtasham/small-mlm-glue-mnli \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_mnli_from_scratch_custom_tokenizer_expand_vocab_en.md b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_mnli_from_scratch_custom_tokenizer_expand_vocab_en.md new file mode 100644 index 00000000000000..4e699bc0e5e97b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_mnli_from_scratch_custom_tokenizer_expand_vocab_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English small_mlm_glue_mnli_from_scratch_custom_tokenizer_expand_vocab BertEmbeddings from muhtasham +author: John Snow Labs +name: small_mlm_glue_mnli_from_scratch_custom_tokenizer_expand_vocab +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`small_mlm_glue_mnli_from_scratch_custom_tokenizer_expand_vocab` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/small_mlm_glue_mnli_from_scratch_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694561907695.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/small_mlm_glue_mnli_from_scratch_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694561907695.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("small_mlm_glue_mnli_from_scratch_custom_tokenizer_expand_vocab","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("small_mlm_glue_mnli_from_scratch_custom_tokenizer_expand_vocab", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|small_mlm_glue_mnli_from_scratch_custom_tokenizer_expand_vocab| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|132.3 MB| + +## References + +https://huggingface.co/muhtasham/small-mlm-glue-mnli-from-scratch-custom-tokenizer-expand-vocab \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_mrpc_custom_tokenizer_en.md b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_mrpc_custom_tokenizer_en.md new file mode 100644 index 00000000000000..bd525709fd0d84 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_mrpc_custom_tokenizer_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English small_mlm_glue_mrpc_custom_tokenizer BertEmbeddings from muhtasham +author: John Snow Labs +name: small_mlm_glue_mrpc_custom_tokenizer +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`small_mlm_glue_mrpc_custom_tokenizer` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/small_mlm_glue_mrpc_custom_tokenizer_en_5.1.1_3.0_1694509478690.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/small_mlm_glue_mrpc_custom_tokenizer_en_5.1.1_3.0_1694509478690.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("small_mlm_glue_mrpc_custom_tokenizer","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("small_mlm_glue_mrpc_custom_tokenizer", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|small_mlm_glue_mrpc_custom_tokenizer| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|106.8 MB| + +## References + +https://huggingface.co/muhtasham/small-mlm-glue-mrpc-custom-tokenizer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_mrpc_custom_tokenizer_expand_vocab_en.md b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_mrpc_custom_tokenizer_expand_vocab_en.md new file mode 100644 index 00000000000000..38d99dfd891555 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_mrpc_custom_tokenizer_expand_vocab_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English small_mlm_glue_mrpc_custom_tokenizer_expand_vocab BertEmbeddings from muhtasham +author: John Snow Labs +name: small_mlm_glue_mrpc_custom_tokenizer_expand_vocab +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`small_mlm_glue_mrpc_custom_tokenizer_expand_vocab` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/small_mlm_glue_mrpc_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694562480371.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/small_mlm_glue_mrpc_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694562480371.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("small_mlm_glue_mrpc_custom_tokenizer_expand_vocab","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("small_mlm_glue_mrpc_custom_tokenizer_expand_vocab", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|small_mlm_glue_mrpc_custom_tokenizer_expand_vocab| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|120.1 MB| + +## References + +https://huggingface.co/muhtasham/small-mlm-glue-mrpc-custom-tokenizer-expand-vocab \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_mrpc_en.md b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_mrpc_en.md new file mode 100644 index 00000000000000..bbb332065ce655 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_mrpc_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English small_mlm_glue_mrpc BertEmbeddings from muhtasham +author: John Snow Labs +name: small_mlm_glue_mrpc +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`small_mlm_glue_mrpc` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/small_mlm_glue_mrpc_en_5.1.1_3.0_1694508535779.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/small_mlm_glue_mrpc_en_5.1.1_3.0_1694508535779.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("small_mlm_glue_mrpc","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("small_mlm_glue_mrpc", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|small_mlm_glue_mrpc| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|107.0 MB| + +## References + +https://huggingface.co/muhtasham/small-mlm-glue-mrpc \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_mrpc_from_scratch_custom_tokenizer_expand_vocab_en.md b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_mrpc_from_scratch_custom_tokenizer_expand_vocab_en.md new file mode 100644 index 00000000000000..52fbd586d3f702 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_mrpc_from_scratch_custom_tokenizer_expand_vocab_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English small_mlm_glue_mrpc_from_scratch_custom_tokenizer_expand_vocab BertEmbeddings from muhtasham +author: John Snow Labs +name: small_mlm_glue_mrpc_from_scratch_custom_tokenizer_expand_vocab +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`small_mlm_glue_mrpc_from_scratch_custom_tokenizer_expand_vocab` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/small_mlm_glue_mrpc_from_scratch_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694562732313.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/small_mlm_glue_mrpc_from_scratch_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694562732313.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("small_mlm_glue_mrpc_from_scratch_custom_tokenizer_expand_vocab","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("small_mlm_glue_mrpc_from_scratch_custom_tokenizer_expand_vocab", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|small_mlm_glue_mrpc_from_scratch_custom_tokenizer_expand_vocab| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|120.1 MB| + +## References + +https://huggingface.co/muhtasham/small-mlm-glue-mrpc-from-scratch-custom-tokenizer-expand-vocab \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_qnli_custom_tokenizer_en.md b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_qnli_custom_tokenizer_en.md new file mode 100644 index 00000000000000..97470becbbd9c1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_qnli_custom_tokenizer_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English small_mlm_glue_qnli_custom_tokenizer BertEmbeddings from muhtasham +author: John Snow Labs +name: small_mlm_glue_qnli_custom_tokenizer +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`small_mlm_glue_qnli_custom_tokenizer` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/small_mlm_glue_qnli_custom_tokenizer_en_5.1.1_3.0_1694509564873.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/small_mlm_glue_qnli_custom_tokenizer_en_5.1.1_3.0_1694509564873.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("small_mlm_glue_qnli_custom_tokenizer","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("small_mlm_glue_qnli_custom_tokenizer", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|small_mlm_glue_qnli_custom_tokenizer| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|106.9 MB| + +## References + +https://huggingface.co/muhtasham/small-mlm-glue-qnli-custom-tokenizer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_qnli_en.md b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_qnli_en.md new file mode 100644 index 00000000000000..71498ecaadd4d8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_qnli_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English small_mlm_glue_qnli BertEmbeddings from muhtasham +author: John Snow Labs +name: small_mlm_glue_qnli +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`small_mlm_glue_qnli` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/small_mlm_glue_qnli_en_5.1.1_3.0_1694508618886.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/small_mlm_glue_qnli_en_5.1.1_3.0_1694508618886.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("small_mlm_glue_qnli","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("small_mlm_glue_qnli", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|small_mlm_glue_qnli| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|107.0 MB| + +## References + +https://huggingface.co/muhtasham/small-mlm-glue-qnli \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_qnli_from_scratch_custom_tokenizer_en.md b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_qnli_from_scratch_custom_tokenizer_en.md new file mode 100644 index 00000000000000..045f2965f76d2e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_qnli_from_scratch_custom_tokenizer_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English small_mlm_glue_qnli_from_scratch_custom_tokenizer BertEmbeddings from muhtasham +author: John Snow Labs +name: small_mlm_glue_qnli_from_scratch_custom_tokenizer +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`small_mlm_glue_qnli_from_scratch_custom_tokenizer` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/small_mlm_glue_qnli_from_scratch_custom_tokenizer_en_5.1.1_3.0_1694552340480.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/small_mlm_glue_qnli_from_scratch_custom_tokenizer_en_5.1.1_3.0_1694552340480.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("small_mlm_glue_qnli_from_scratch_custom_tokenizer","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("small_mlm_glue_qnli_from_scratch_custom_tokenizer", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|small_mlm_glue_qnli_from_scratch_custom_tokenizer| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|107.0 MB| + +## References + +https://huggingface.co/muhtasham/small-mlm-glue-qnli-from-scratch-custom-tokenizer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_qqp_custom_tokenizer_en.md b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_qqp_custom_tokenizer_en.md new file mode 100644 index 00000000000000..bd5955a1f6e764 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_qqp_custom_tokenizer_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English small_mlm_glue_qqp_custom_tokenizer BertEmbeddings from muhtasham +author: John Snow Labs +name: small_mlm_glue_qqp_custom_tokenizer +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`small_mlm_glue_qqp_custom_tokenizer` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/small_mlm_glue_qqp_custom_tokenizer_en_5.1.1_3.0_1694509649898.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/small_mlm_glue_qqp_custom_tokenizer_en_5.1.1_3.0_1694509649898.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("small_mlm_glue_qqp_custom_tokenizer","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("small_mlm_glue_qqp_custom_tokenizer", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|small_mlm_glue_qqp_custom_tokenizer| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|106.8 MB| + +## References + +https://huggingface.co/muhtasham/small-mlm-glue-qqp-custom-tokenizer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_qqp_en.md b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_qqp_en.md new file mode 100644 index 00000000000000..6e4f20b08ad113 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_qqp_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English small_mlm_glue_qqp BertEmbeddings from muhtasham +author: John Snow Labs +name: small_mlm_glue_qqp +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`small_mlm_glue_qqp` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/small_mlm_glue_qqp_en_5.1.1_3.0_1694508712348.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/small_mlm_glue_qqp_en_5.1.1_3.0_1694508712348.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("small_mlm_glue_qqp","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("small_mlm_glue_qqp", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|small_mlm_glue_qqp| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|106.9 MB| + +## References + +https://huggingface.co/muhtasham/small-mlm-glue-qqp \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_rte_custom_tokenizer_en.md b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_rte_custom_tokenizer_en.md new file mode 100644 index 00000000000000..4d21a83f168089 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_rte_custom_tokenizer_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English small_mlm_glue_rte_custom_tokenizer BertEmbeddings from muhtasham +author: John Snow Labs +name: small_mlm_glue_rte_custom_tokenizer +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`small_mlm_glue_rte_custom_tokenizer` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/small_mlm_glue_rte_custom_tokenizer_en_5.1.1_3.0_1694509735612.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/small_mlm_glue_rte_custom_tokenizer_en_5.1.1_3.0_1694509735612.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("small_mlm_glue_rte_custom_tokenizer","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("small_mlm_glue_rte_custom_tokenizer", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|small_mlm_glue_rte_custom_tokenizer| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|106.9 MB| + +## References + +https://huggingface.co/muhtasham/small-mlm-glue-rte-custom-tokenizer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_rte_en.md b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_rte_en.md new file mode 100644 index 00000000000000..e259afbe76f735 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_rte_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English small_mlm_glue_rte BertEmbeddings from muhtasham +author: John Snow Labs +name: small_mlm_glue_rte +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`small_mlm_glue_rte` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/small_mlm_glue_rte_en_5.1.1_3.0_1694508793653.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/small_mlm_glue_rte_en_5.1.1_3.0_1694508793653.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("small_mlm_glue_rte","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("small_mlm_glue_rte", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|small_mlm_glue_rte| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|106.9 MB| + +## References + +https://huggingface.co/muhtasham/small-mlm-glue-rte \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_sst2_custom_tokenizer_en.md b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_sst2_custom_tokenizer_en.md new file mode 100644 index 00000000000000..39f5f36b12d5e0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_sst2_custom_tokenizer_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English small_mlm_glue_sst2_custom_tokenizer BertEmbeddings from muhtasham +author: John Snow Labs +name: small_mlm_glue_sst2_custom_tokenizer +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`small_mlm_glue_sst2_custom_tokenizer` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/small_mlm_glue_sst2_custom_tokenizer_en_5.1.1_3.0_1694509835393.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/small_mlm_glue_sst2_custom_tokenizer_en_5.1.1_3.0_1694509835393.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("small_mlm_glue_sst2_custom_tokenizer","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("small_mlm_glue_sst2_custom_tokenizer", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|small_mlm_glue_sst2_custom_tokenizer| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|106.8 MB| + +## References + +https://huggingface.co/muhtasham/small-mlm-glue-sst2-custom-tokenizer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_sst2_en.md b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_sst2_en.md new file mode 100644 index 00000000000000..0ccb73aaf94f88 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_sst2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English small_mlm_glue_sst2 BertEmbeddings from muhtasham +author: John Snow Labs +name: small_mlm_glue_sst2 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`small_mlm_glue_sst2` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/small_mlm_glue_sst2_en_5.1.1_3.0_1694508920238.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/small_mlm_glue_sst2_en_5.1.1_3.0_1694508920238.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("small_mlm_glue_sst2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("small_mlm_glue_sst2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|small_mlm_glue_sst2| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|107.0 MB| + +## References + +https://huggingface.co/muhtasham/small-mlm-glue-sst2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_stsb_custom_tokenizer_en.md b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_stsb_custom_tokenizer_en.md new file mode 100644 index 00000000000000..99565eded80454 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_stsb_custom_tokenizer_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English small_mlm_glue_stsb_custom_tokenizer BertEmbeddings from muhtasham +author: John Snow Labs +name: small_mlm_glue_stsb_custom_tokenizer +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`small_mlm_glue_stsb_custom_tokenizer` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/small_mlm_glue_stsb_custom_tokenizer_en_5.1.1_3.0_1694509912489.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/small_mlm_glue_stsb_custom_tokenizer_en_5.1.1_3.0_1694509912489.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("small_mlm_glue_stsb_custom_tokenizer","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("small_mlm_glue_stsb_custom_tokenizer", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|small_mlm_glue_stsb_custom_tokenizer| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|106.8 MB| + +## References + +https://huggingface.co/muhtasham/small-mlm-glue-stsb-custom-tokenizer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_stsb_en.md b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_stsb_en.md new file mode 100644 index 00000000000000..bfa92c4f11fa24 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_stsb_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English small_mlm_glue_stsb BertEmbeddings from muhtasham +author: John Snow Labs +name: small_mlm_glue_stsb +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`small_mlm_glue_stsb` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/small_mlm_glue_stsb_en_5.1.1_3.0_1694509008527.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/small_mlm_glue_stsb_en_5.1.1_3.0_1694509008527.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("small_mlm_glue_stsb","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("small_mlm_glue_stsb", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|small_mlm_glue_stsb| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|106.9 MB| + +## References + +https://huggingface.co/muhtasham/small-mlm-glue-stsb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_wnli_custom_tokenizer_en.md b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_wnli_custom_tokenizer_en.md new file mode 100644 index 00000000000000..5a24f818bbf09a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_wnli_custom_tokenizer_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English small_mlm_glue_wnli_custom_tokenizer BertEmbeddings from muhtasham +author: John Snow Labs +name: small_mlm_glue_wnli_custom_tokenizer +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`small_mlm_glue_wnli_custom_tokenizer` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/small_mlm_glue_wnli_custom_tokenizer_en_5.1.1_3.0_1694510024053.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/small_mlm_glue_wnli_custom_tokenizer_en_5.1.1_3.0_1694510024053.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("small_mlm_glue_wnli_custom_tokenizer","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("small_mlm_glue_wnli_custom_tokenizer", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|small_mlm_glue_wnli_custom_tokenizer| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|106.7 MB| + +## References + +https://huggingface.co/muhtasham/small-mlm-glue-wnli-custom-tokenizer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_wnli_en.md b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_wnli_en.md new file mode 100644 index 00000000000000..e7ad7199f2b60b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_wnli_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English small_mlm_glue_wnli BertEmbeddings from muhtasham +author: John Snow Labs +name: small_mlm_glue_wnli +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`small_mlm_glue_wnli` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/small_mlm_glue_wnli_en_5.1.1_3.0_1694509092224.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/small_mlm_glue_wnli_en_5.1.1_3.0_1694509092224.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("small_mlm_glue_wnli","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("small_mlm_glue_wnli", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|small_mlm_glue_wnli| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|107.0 MB| + +## References + +https://huggingface.co/muhtasham/small-mlm-glue-wnli \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-small_mlm_imdb_from_scratch_en.md b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_imdb_from_scratch_en.md new file mode 100644 index 00000000000000..435fc8604cbd35 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_imdb_from_scratch_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English small_mlm_imdb_from_scratch BertEmbeddings from muhtasham +author: John Snow Labs +name: small_mlm_imdb_from_scratch +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`small_mlm_imdb_from_scratch` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/small_mlm_imdb_from_scratch_en_5.1.1_3.0_1694556494575.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/small_mlm_imdb_from_scratch_en_5.1.1_3.0_1694556494575.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("small_mlm_imdb_from_scratch","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("small_mlm_imdb_from_scratch", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|small_mlm_imdb_from_scratch| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|107.0 MB| + +## References + +https://huggingface.co/muhtasham/small-mlm-imdb-from-scratch \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-small_mlm_rotten_tomatoes_custom_tokenizer_en.md b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_rotten_tomatoes_custom_tokenizer_en.md new file mode 100644 index 00000000000000..d6072ae617d93e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_rotten_tomatoes_custom_tokenizer_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English small_mlm_rotten_tomatoes_custom_tokenizer BertEmbeddings from muhtasham +author: John Snow Labs +name: small_mlm_rotten_tomatoes_custom_tokenizer +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`small_mlm_rotten_tomatoes_custom_tokenizer` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/small_mlm_rotten_tomatoes_custom_tokenizer_en_5.1.1_3.0_1694549132801.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/small_mlm_rotten_tomatoes_custom_tokenizer_en_5.1.1_3.0_1694549132801.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("small_mlm_rotten_tomatoes_custom_tokenizer","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("small_mlm_rotten_tomatoes_custom_tokenizer", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|small_mlm_rotten_tomatoes_custom_tokenizer| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|106.9 MB| + +## References + +https://huggingface.co/muhtasham/small-mlm-rotten_tomatoes-custom-tokenizer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-small_mlm_rotten_tomatoes_en.md b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_rotten_tomatoes_en.md new file mode 100644 index 00000000000000..e6d871875cbae0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_rotten_tomatoes_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English small_mlm_rotten_tomatoes BertEmbeddings from muhtasham +author: John Snow Labs +name: small_mlm_rotten_tomatoes +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`small_mlm_rotten_tomatoes` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/small_mlm_rotten_tomatoes_en_5.1.1_3.0_1694549024792.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/small_mlm_rotten_tomatoes_en_5.1.1_3.0_1694549024792.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("small_mlm_rotten_tomatoes","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("small_mlm_rotten_tomatoes", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|small_mlm_rotten_tomatoes| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|107.0 MB| + +## References + +https://huggingface.co/muhtasham/small-mlm-rotten_tomatoes \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-small_mlm_rotten_tomatoes_from_scratch_en.md b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_rotten_tomatoes_from_scratch_en.md new file mode 100644 index 00000000000000..f02bcd6dc7d0e2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_rotten_tomatoes_from_scratch_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English small_mlm_rotten_tomatoes_from_scratch BertEmbeddings from muhtasham +author: John Snow Labs +name: small_mlm_rotten_tomatoes_from_scratch +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`small_mlm_rotten_tomatoes_from_scratch` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/small_mlm_rotten_tomatoes_from_scratch_en_5.1.1_3.0_1694556082715.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/small_mlm_rotten_tomatoes_from_scratch_en_5.1.1_3.0_1694556082715.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("small_mlm_rotten_tomatoes_from_scratch","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("small_mlm_rotten_tomatoes_from_scratch", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|small_mlm_rotten_tomatoes_from_scratch| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|107.0 MB| + +## References + +https://huggingface.co/muhtasham/small-mlm-rotten_tomatoes-from-scratch \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-small_mlm_snli_custom_tokenizer_en.md b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_snli_custom_tokenizer_en.md new file mode 100644 index 00000000000000..b7baa5d9ae69d7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_snli_custom_tokenizer_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English small_mlm_snli_custom_tokenizer BertEmbeddings from muhtasham +author: John Snow Labs +name: small_mlm_snli_custom_tokenizer +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`small_mlm_snli_custom_tokenizer` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/small_mlm_snli_custom_tokenizer_en_5.1.1_3.0_1694549568395.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/small_mlm_snli_custom_tokenizer_en_5.1.1_3.0_1694549568395.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("small_mlm_snli_custom_tokenizer","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("small_mlm_snli_custom_tokenizer", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|small_mlm_snli_custom_tokenizer| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|106.7 MB| + +## References + +https://huggingface.co/muhtasham/small-mlm-snli-custom-tokenizer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-small_mlm_snli_en.md b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_snli_en.md new file mode 100644 index 00000000000000..7184e8b0f81254 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_snli_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English small_mlm_snli BertEmbeddings from muhtasham +author: John Snow Labs +name: small_mlm_snli +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`small_mlm_snli` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/small_mlm_snli_en_5.1.1_3.0_1694549486115.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/small_mlm_snli_en_5.1.1_3.0_1694549486115.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("small_mlm_snli","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("small_mlm_snli", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|small_mlm_snli| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|106.9 MB| + +## References + +https://huggingface.co/muhtasham/small-mlm-snli \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-small_mlm_snli_from_scratch_en.md b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_snli_from_scratch_en.md new file mode 100644 index 00000000000000..af74e062bd8cd7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_snli_from_scratch_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English small_mlm_snli_from_scratch BertEmbeddings from muhtasham +author: John Snow Labs +name: small_mlm_snli_from_scratch +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`small_mlm_snli_from_scratch` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/small_mlm_snli_from_scratch_en_5.1.1_3.0_1694556408270.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/small_mlm_snli_from_scratch_en_5.1.1_3.0_1694556408270.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("small_mlm_snli_from_scratch","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("small_mlm_snli_from_scratch", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|small_mlm_snli_from_scratch| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|107.0 MB| + +## References + +https://huggingface.co/muhtasham/small-mlm-snli-from-scratch \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-small_mlm_squad_custom_tokenizer_en.md b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_squad_custom_tokenizer_en.md new file mode 100644 index 00000000000000..71c53adcb7862f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_squad_custom_tokenizer_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English small_mlm_squad_custom_tokenizer BertEmbeddings from muhtasham +author: John Snow Labs +name: small_mlm_squad_custom_tokenizer +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`small_mlm_squad_custom_tokenizer` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/small_mlm_squad_custom_tokenizer_en_5.1.1_3.0_1694547690830.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/small_mlm_squad_custom_tokenizer_en_5.1.1_3.0_1694547690830.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("small_mlm_squad_custom_tokenizer","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("small_mlm_squad_custom_tokenizer", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|small_mlm_squad_custom_tokenizer| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|106.8 MB| + +## References + +https://huggingface.co/muhtasham/small-mlm-squad-custom-tokenizer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-small_mlm_squad_en.md b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_squad_en.md new file mode 100644 index 00000000000000..88957ff7bd7aa9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_squad_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English small_mlm_squad BertEmbeddings from muhtasham +author: John Snow Labs +name: small_mlm_squad +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`small_mlm_squad` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/small_mlm_squad_en_5.1.1_3.0_1694547514364.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/small_mlm_squad_en_5.1.1_3.0_1694547514364.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("small_mlm_squad","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("small_mlm_squad", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|small_mlm_squad| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|106.9 MB| + +## References + +https://huggingface.co/muhtasham/small-mlm-squad \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-small_mlm_tweet_eval_from_scratch_en.md b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_tweet_eval_from_scratch_en.md new file mode 100644 index 00000000000000..ffba4d5978e137 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_tweet_eval_from_scratch_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English small_mlm_tweet_eval_from_scratch BertEmbeddings from muhtasham +author: John Snow Labs +name: small_mlm_tweet_eval_from_scratch +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`small_mlm_tweet_eval_from_scratch` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/small_mlm_tweet_eval_from_scratch_en_5.1.1_3.0_1694555281014.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/small_mlm_tweet_eval_from_scratch_en_5.1.1_3.0_1694555281014.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("small_mlm_tweet_eval_from_scratch","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("small_mlm_tweet_eval_from_scratch", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|small_mlm_tweet_eval_from_scratch| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|107.0 MB| + +## References + +https://huggingface.co/muhtasham/small-mlm-tweet_eval-from-scratch \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-small_mlm_wikitext_custom_tokenizer_en.md b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_wikitext_custom_tokenizer_en.md new file mode 100644 index 00000000000000..a678a8c8a89aec --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_wikitext_custom_tokenizer_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English small_mlm_wikitext_custom_tokenizer BertEmbeddings from muhtasham +author: John Snow Labs +name: small_mlm_wikitext_custom_tokenizer +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`small_mlm_wikitext_custom_tokenizer` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/small_mlm_wikitext_custom_tokenizer_en_5.1.1_3.0_1694548914392.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/small_mlm_wikitext_custom_tokenizer_en_5.1.1_3.0_1694548914392.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("small_mlm_wikitext_custom_tokenizer","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("small_mlm_wikitext_custom_tokenizer", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|small_mlm_wikitext_custom_tokenizer| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|106.9 MB| + +## References + +https://huggingface.co/muhtasham/small-mlm-wikitext-custom-tokenizer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-small_mlm_wikitext_en.md b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_wikitext_en.md new file mode 100644 index 00000000000000..5addb769947b43 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_wikitext_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English small_mlm_wikitext BertEmbeddings from muhtasham +author: John Snow Labs +name: small_mlm_wikitext +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`small_mlm_wikitext` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/small_mlm_wikitext_en_5.1.1_3.0_1694548827535.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/small_mlm_wikitext_en_5.1.1_3.0_1694548827535.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("small_mlm_wikitext","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("small_mlm_wikitext", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|small_mlm_wikitext| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|107.0 MB| + +## References + +https://huggingface.co/muhtasham/small-mlm-wikitext \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-small_mlm_wikitext_from_scratch_custom_tokenizer_en.md b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_wikitext_from_scratch_custom_tokenizer_en.md new file mode 100644 index 00000000000000..7b18c3d6f73109 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_wikitext_from_scratch_custom_tokenizer_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English small_mlm_wikitext_from_scratch_custom_tokenizer BertEmbeddings from muhtasham +author: John Snow Labs +name: small_mlm_wikitext_from_scratch_custom_tokenizer +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`small_mlm_wikitext_from_scratch_custom_tokenizer` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/small_mlm_wikitext_from_scratch_custom_tokenizer_en_5.1.1_3.0_1694554676967.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/small_mlm_wikitext_from_scratch_custom_tokenizer_en_5.1.1_3.0_1694554676967.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("small_mlm_wikitext_from_scratch_custom_tokenizer","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("small_mlm_wikitext_from_scratch_custom_tokenizer", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|small_mlm_wikitext_from_scratch_custom_tokenizer| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|107.0 MB| + +## References + +https://huggingface.co/muhtasham/small-mlm-wikitext-from-scratch-custom-tokenizer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-small_mlm_wikitext_from_scratch_en.md b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_wikitext_from_scratch_en.md new file mode 100644 index 00000000000000..3effc5c3c26392 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_wikitext_from_scratch_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English small_mlm_wikitext_from_scratch BertEmbeddings from muhtasham +author: John Snow Labs +name: small_mlm_wikitext_from_scratch +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`small_mlm_wikitext_from_scratch` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/small_mlm_wikitext_from_scratch_en_5.1.1_3.0_1694556248548.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/small_mlm_wikitext_from_scratch_en_5.1.1_3.0_1694556248548.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("small_mlm_wikitext_from_scratch","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("small_mlm_wikitext_from_scratch", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|small_mlm_wikitext_from_scratch| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|107.0 MB| + +## References + +https://huggingface.co/muhtasham/small-mlm-wikitext-from-scratch \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-sp_bert_en.md b/docs/_posts/ahmedlone127/2023-09-12-sp_bert_en.md new file mode 100644 index 00000000000000..63763e7dbc4f14 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-sp_bert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English sp_bert BertEmbeddings from tumd +author: John Snow Labs +name: sp_bert +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sp_bert` is a English model originally trained by tumd. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sp_bert_en_5.1.1_3.0_1694549714006.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sp_bert_en_5.1.1_3.0_1694549714006.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("sp_bert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("sp_bert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sp_bert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|664.7 MB| + +## References + +https://huggingface.co/tumd/sp-bert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-ssci_bert_e2_en.md b/docs/_posts/ahmedlone127/2023-09-12-ssci_bert_e2_en.md new file mode 100644 index 00000000000000..ba92b1d931e349 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-ssci_bert_e2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English ssci_bert_e2 BertEmbeddings from KM4STfulltext +author: John Snow Labs +name: ssci_bert_e2 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ssci_bert_e2` is a English model originally trained by KM4STfulltext. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ssci_bert_e2_en_5.1.1_3.0_1694508246464.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ssci_bert_e2_en_5.1.1_3.0_1694508246464.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("ssci_bert_e2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("ssci_bert_e2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ssci_bert_e2| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.6 MB| + +## References + +https://huggingface.co/KM4STfulltext/SSCI-BERT-e2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-ssci_bert_e4_en.md b/docs/_posts/ahmedlone127/2023-09-12-ssci_bert_e4_en.md new file mode 100644 index 00000000000000..b64ed6bbf787a0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-ssci_bert_e4_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English ssci_bert_e4 BertEmbeddings from KM4STfulltext +author: John Snow Labs +name: ssci_bert_e4 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ssci_bert_e4` is a English model originally trained by KM4STfulltext. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ssci_bert_e4_en_5.1.1_3.0_1694508373404.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ssci_bert_e4_en_5.1.1_3.0_1694508373404.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("ssci_bert_e4","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("ssci_bert_e4", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ssci_bert_e4| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.6 MB| + +## References + +https://huggingface.co/KM4STfulltext/SSCI-BERT-e4 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-ssci_scibert_e2_en.md b/docs/_posts/ahmedlone127/2023-09-12-ssci_scibert_e2_en.md new file mode 100644 index 00000000000000..1e99d99f79b07a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-ssci_scibert_e2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English ssci_scibert_e2 BertEmbeddings from KM4STfulltext +author: John Snow Labs +name: ssci_scibert_e2 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ssci_scibert_e2` is a English model originally trained by KM4STfulltext. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ssci_scibert_e2_en_5.1.1_3.0_1694508512306.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ssci_scibert_e2_en_5.1.1_3.0_1694508512306.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("ssci_scibert_e2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("ssci_scibert_e2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ssci_scibert_e2| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|409.9 MB| + +## References + +https://huggingface.co/KM4STfulltext/SSCI-SciBERT-e2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-ssci_scibert_e4_en.md b/docs/_posts/ahmedlone127/2023-09-12-ssci_scibert_e4_en.md new file mode 100644 index 00000000000000..092e2a997f643a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-ssci_scibert_e4_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English ssci_scibert_e4 BertEmbeddings from KM4STfulltext +author: John Snow Labs +name: ssci_scibert_e4 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ssci_scibert_e4` is a English model originally trained by KM4STfulltext. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ssci_scibert_e4_en_5.1.1_3.0_1694508639778.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ssci_scibert_e4_en_5.1.1_3.0_1694508639778.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("ssci_scibert_e4","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("ssci_scibert_e4", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ssci_scibert_e4| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|409.9 MB| + +## References + +https://huggingface.co/KM4STfulltext/SSCI-SciBERT-e4 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-telugu_bertu_te.md b/docs/_posts/ahmedlone127/2023-09-12-telugu_bertu_te.md new file mode 100644 index 00000000000000..feb4cc314f2447 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-telugu_bertu_te.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Telugu telugu_bertu BertEmbeddings from kuppuluri +author: John Snow Labs +name: telugu_bertu +date: 2023-09-12 +tags: [bert, te, open_source, fill_mask, onnx] +task: Embeddings +language: te +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`telugu_bertu` is a Telugu model originally trained by kuppuluri. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/telugu_bertu_te_5.1.1_3.0_1694510680655.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/telugu_bertu_te_5.1.1_3.0_1694510680655.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("telugu_bertu","te") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("telugu_bertu", "te") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|telugu_bertu| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|te| +|Size:|412.5 MB| + +## References + +https://huggingface.co/kuppuluri/telugu_bertu \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-test_model_nws_en.md b/docs/_posts/ahmedlone127/2023-09-12-test_model_nws_en.md new file mode 100644 index 00000000000000..9c8ca16c8f2d54 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-test_model_nws_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English test_model_nws BertEmbeddings from nws +author: John Snow Labs +name: test_model_nws +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`test_model_nws` is a English model originally trained by nws. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/test_model_nws_en_5.1.1_3.0_1694562663160.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/test_model_nws_en_5.1.1_3.0_1694562663160.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("test_model_nws","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("test_model_nws", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|test_model_nws| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/nws/test_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-tiny_bert_turkish_cased_en.md b/docs/_posts/ahmedlone127/2023-09-12-tiny_bert_turkish_cased_en.md new file mode 100644 index 00000000000000..98cdccd4f0dfac --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-tiny_bert_turkish_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tiny_bert_turkish_cased BertEmbeddings from uygarkurt +author: John Snow Labs +name: tiny_bert_turkish_cased +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tiny_bert_turkish_cased` is a English model originally trained by uygarkurt. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tiny_bert_turkish_cased_en_5.1.1_3.0_1694509025179.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tiny_bert_turkish_cased_en_5.1.1_3.0_1694509025179.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tiny_bert_turkish_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tiny_bert_turkish_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tiny_bert_turkish_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|17.4 MB| + +## References + +https://huggingface.co/uygarkurt/tiny-bert-turkish-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_conll2003_custom_tokenizer_en.md b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_conll2003_custom_tokenizer_en.md new file mode 100644 index 00000000000000..3ad78833fd3a6b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_conll2003_custom_tokenizer_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tiny_mlm_conll2003_custom_tokenizer BertEmbeddings from muhtasham +author: John Snow Labs +name: tiny_mlm_conll2003_custom_tokenizer +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tiny_mlm_conll2003_custom_tokenizer` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tiny_mlm_conll2003_custom_tokenizer_en_5.1.1_3.0_1694548724810.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tiny_mlm_conll2003_custom_tokenizer_en_5.1.1_3.0_1694548724810.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tiny_mlm_conll2003_custom_tokenizer","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tiny_mlm_conll2003_custom_tokenizer", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tiny_mlm_conll2003_custom_tokenizer| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|16.6 MB| + +## References + +https://huggingface.co/muhtasham/tiny-mlm-conll2003-custom-tokenizer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_conll2003_en.md b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_conll2003_en.md new file mode 100644 index 00000000000000..c6797bc36f72fa --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_conll2003_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tiny_mlm_conll2003 BertEmbeddings from muhtasham +author: John Snow Labs +name: tiny_mlm_conll2003 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tiny_mlm_conll2003` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tiny_mlm_conll2003_en_5.1.1_3.0_1694548620549.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tiny_mlm_conll2003_en_5.1.1_3.0_1694548620549.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tiny_mlm_conll2003","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tiny_mlm_conll2003", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tiny_mlm_conll2003| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|16.7 MB| + +## References + +https://huggingface.co/muhtasham/tiny-mlm-conll2003 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_conll2003_from_scratch_en.md b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_conll2003_from_scratch_en.md new file mode 100644 index 00000000000000..7467ed42fd81a4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_conll2003_from_scratch_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tiny_mlm_conll2003_from_scratch BertEmbeddings from muhtasham +author: John Snow Labs +name: tiny_mlm_conll2003_from_scratch +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tiny_mlm_conll2003_from_scratch` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tiny_mlm_conll2003_from_scratch_en_5.1.1_3.0_1694555420238.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tiny_mlm_conll2003_from_scratch_en_5.1.1_3.0_1694555420238.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tiny_mlm_conll2003_from_scratch","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tiny_mlm_conll2003_from_scratch", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tiny_mlm_conll2003_from_scratch| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|16.4 MB| + +## References + +https://huggingface.co/muhtasham/tiny-mlm-conll2003-from-scratch \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_cola_custom_tokenizer_en.md b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_cola_custom_tokenizer_en.md new file mode 100644 index 00000000000000..3c2eeb688abfad --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_cola_custom_tokenizer_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tiny_mlm_glue_cola_custom_tokenizer BertEmbeddings from muhtasham +author: John Snow Labs +name: tiny_mlm_glue_cola_custom_tokenizer +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tiny_mlm_glue_cola_custom_tokenizer` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tiny_mlm_glue_cola_custom_tokenizer_en_5.1.1_3.0_1694510104742.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tiny_mlm_glue_cola_custom_tokenizer_en_5.1.1_3.0_1694510104742.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tiny_mlm_glue_cola_custom_tokenizer","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tiny_mlm_glue_cola_custom_tokenizer", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tiny_mlm_glue_cola_custom_tokenizer| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|16.4 MB| + +## References + +https://huggingface.co/muhtasham/tiny-mlm-glue-cola-custom-tokenizer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_cola_custom_tokenizer_expand_vocab_en.md b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_cola_custom_tokenizer_expand_vocab_en.md new file mode 100644 index 00000000000000..acaa7b9bf20156 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_cola_custom_tokenizer_expand_vocab_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tiny_mlm_glue_cola_custom_tokenizer_expand_vocab BertEmbeddings from muhtasham +author: John Snow Labs +name: tiny_mlm_glue_cola_custom_tokenizer_expand_vocab +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tiny_mlm_glue_cola_custom_tokenizer_expand_vocab` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tiny_mlm_glue_cola_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694561081439.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tiny_mlm_glue_cola_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694561081439.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tiny_mlm_glue_cola_custom_tokenizer_expand_vocab","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tiny_mlm_glue_cola_custom_tokenizer_expand_vocab", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tiny_mlm_glue_cola_custom_tokenizer_expand_vocab| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|17.9 MB| + +## References + +https://huggingface.co/muhtasham/tiny-mlm-glue-cola-custom-tokenizer-expand-vocab \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_cola_from_scratch_custom_tokenizer_expand_vocab_en.md b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_cola_from_scratch_custom_tokenizer_expand_vocab_en.md new file mode 100644 index 00000000000000..f74c75c1c82133 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_cola_from_scratch_custom_tokenizer_expand_vocab_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tiny_mlm_glue_cola_from_scratch_custom_tokenizer_expand_vocab BertEmbeddings from muhtasham +author: John Snow Labs +name: tiny_mlm_glue_cola_from_scratch_custom_tokenizer_expand_vocab +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tiny_mlm_glue_cola_from_scratch_custom_tokenizer_expand_vocab` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tiny_mlm_glue_cola_from_scratch_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694561266748.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tiny_mlm_glue_cola_from_scratch_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694561266748.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tiny_mlm_glue_cola_from_scratch_custom_tokenizer_expand_vocab","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tiny_mlm_glue_cola_from_scratch_custom_tokenizer_expand_vocab", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tiny_mlm_glue_cola_from_scratch_custom_tokenizer_expand_vocab| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|17.6 MB| + +## References + +https://huggingface.co/muhtasham/tiny-mlm-glue-cola-from-scratch-custom-tokenizer-expand-vocab \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_mnli_custom_tokenizer_en.md b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_mnli_custom_tokenizer_en.md new file mode 100644 index 00000000000000..5b3b531154c3ac --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_mnli_custom_tokenizer_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tiny_mlm_glue_mnli_custom_tokenizer BertEmbeddings from muhtasham +author: John Snow Labs +name: tiny_mlm_glue_mnli_custom_tokenizer +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tiny_mlm_glue_mnli_custom_tokenizer` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tiny_mlm_glue_mnli_custom_tokenizer_en_5.1.1_3.0_1694510167051.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tiny_mlm_glue_mnli_custom_tokenizer_en_5.1.1_3.0_1694510167051.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tiny_mlm_glue_mnli_custom_tokenizer","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tiny_mlm_glue_mnli_custom_tokenizer", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tiny_mlm_glue_mnli_custom_tokenizer| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|16.6 MB| + +## References + +https://huggingface.co/muhtasham/tiny-mlm-glue-mnli-custom-tokenizer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_mnli_custom_tokenizer_expand_vocab_en.md b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_mnli_custom_tokenizer_expand_vocab_en.md new file mode 100644 index 00000000000000..7de7e773837c40 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_mnli_custom_tokenizer_expand_vocab_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tiny_mlm_glue_mnli_custom_tokenizer_expand_vocab BertEmbeddings from muhtasham +author: John Snow Labs +name: tiny_mlm_glue_mnli_custom_tokenizer_expand_vocab +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tiny_mlm_glue_mnli_custom_tokenizer_expand_vocab` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tiny_mlm_glue_mnli_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694561453097.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tiny_mlm_glue_mnli_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694561453097.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tiny_mlm_glue_mnli_custom_tokenizer_expand_vocab","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tiny_mlm_glue_mnli_custom_tokenizer_expand_vocab", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tiny_mlm_glue_mnli_custom_tokenizer_expand_vocab| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|23.0 MB| + +## References + +https://huggingface.co/muhtasham/tiny-mlm-glue-mnli-custom-tokenizer-expand-vocab \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_mnli_from_scratch_custom_tokenizer_expand_vocab_en.md b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_mnli_from_scratch_custom_tokenizer_expand_vocab_en.md new file mode 100644 index 00000000000000..963f638a4de5a8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_mnli_from_scratch_custom_tokenizer_expand_vocab_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tiny_mlm_glue_mnli_from_scratch_custom_tokenizer_expand_vocab BertEmbeddings from muhtasham +author: John Snow Labs +name: tiny_mlm_glue_mnli_from_scratch_custom_tokenizer_expand_vocab +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tiny_mlm_glue_mnli_from_scratch_custom_tokenizer_expand_vocab` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tiny_mlm_glue_mnli_from_scratch_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694561530936.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tiny_mlm_glue_mnli_from_scratch_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694561530936.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tiny_mlm_glue_mnli_from_scratch_custom_tokenizer_expand_vocab","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tiny_mlm_glue_mnli_from_scratch_custom_tokenizer_expand_vocab", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tiny_mlm_glue_mnli_from_scratch_custom_tokenizer_expand_vocab| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|22.6 MB| + +## References + +https://huggingface.co/muhtasham/tiny-mlm-glue-mnli-from-scratch-custom-tokenizer-expand-vocab \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_mrpc_custom_tokenizer_en.md b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_mrpc_custom_tokenizer_en.md new file mode 100644 index 00000000000000..5fbfae07caa856 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_mrpc_custom_tokenizer_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tiny_mlm_glue_mrpc_custom_tokenizer BertEmbeddings from muhtasham +author: John Snow Labs +name: tiny_mlm_glue_mrpc_custom_tokenizer +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tiny_mlm_glue_mrpc_custom_tokenizer` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tiny_mlm_glue_mrpc_custom_tokenizer_en_5.1.1_3.0_1694510364675.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tiny_mlm_glue_mrpc_custom_tokenizer_en_5.1.1_3.0_1694510364675.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tiny_mlm_glue_mrpc_custom_tokenizer","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tiny_mlm_glue_mrpc_custom_tokenizer", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tiny_mlm_glue_mrpc_custom_tokenizer| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|16.5 MB| + +## References + +https://huggingface.co/muhtasham/tiny-mlm-glue-mrpc-custom-tokenizer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_mrpc_from_scratch_custom_tokenizer_expand_vocab_en.md b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_mrpc_from_scratch_custom_tokenizer_expand_vocab_en.md new file mode 100644 index 00000000000000..a997aa904cf11f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_mrpc_from_scratch_custom_tokenizer_expand_vocab_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tiny_mlm_glue_mrpc_from_scratch_custom_tokenizer_expand_vocab BertEmbeddings from muhtasham +author: John Snow Labs +name: tiny_mlm_glue_mrpc_from_scratch_custom_tokenizer_expand_vocab +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tiny_mlm_glue_mrpc_from_scratch_custom_tokenizer_expand_vocab` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tiny_mlm_glue_mrpc_from_scratch_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694561983732.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tiny_mlm_glue_mrpc_from_scratch_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694561983732.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tiny_mlm_glue_mrpc_from_scratch_custom_tokenizer_expand_vocab","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tiny_mlm_glue_mrpc_from_scratch_custom_tokenizer_expand_vocab", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tiny_mlm_glue_mrpc_from_scratch_custom_tokenizer_expand_vocab| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|19.7 MB| + +## References + +https://huggingface.co/muhtasham/tiny-mlm-glue-mrpc-from-scratch-custom-tokenizer-expand-vocab \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_qnli_custom_tokenizer_en.md b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_qnli_custom_tokenizer_en.md new file mode 100644 index 00000000000000..270d4f0f4faaad --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_qnli_custom_tokenizer_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tiny_mlm_glue_qnli_custom_tokenizer BertEmbeddings from muhtasham +author: John Snow Labs +name: tiny_mlm_glue_qnli_custom_tokenizer +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tiny_mlm_glue_qnli_custom_tokenizer` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tiny_mlm_glue_qnli_custom_tokenizer_en_5.1.1_3.0_1694510431505.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tiny_mlm_glue_qnli_custom_tokenizer_en_5.1.1_3.0_1694510431505.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tiny_mlm_glue_qnli_custom_tokenizer","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tiny_mlm_glue_qnli_custom_tokenizer", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tiny_mlm_glue_qnli_custom_tokenizer| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|16.6 MB| + +## References + +https://huggingface.co/muhtasham/tiny-mlm-glue-qnli-custom-tokenizer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_qnli_custom_tokenizer_expand_vocab_en.md b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_qnli_custom_tokenizer_expand_vocab_en.md new file mode 100644 index 00000000000000..156a579db32ff1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_qnli_custom_tokenizer_expand_vocab_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tiny_mlm_glue_qnli_custom_tokenizer_expand_vocab BertEmbeddings from muhtasham +author: John Snow Labs +name: tiny_mlm_glue_qnli_custom_tokenizer_expand_vocab +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tiny_mlm_glue_qnli_custom_tokenizer_expand_vocab` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tiny_mlm_glue_qnli_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694562076733.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tiny_mlm_glue_qnli_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694562076733.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tiny_mlm_glue_qnli_custom_tokenizer_expand_vocab","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tiny_mlm_glue_qnli_custom_tokenizer_expand_vocab", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tiny_mlm_glue_qnli_custom_tokenizer_expand_vocab| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|23.7 MB| + +## References + +https://huggingface.co/muhtasham/tiny-mlm-glue-qnli-custom-tokenizer-expand-vocab \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_qnli_from_scratch_custom_tokenizer_en.md b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_qnli_from_scratch_custom_tokenizer_en.md new file mode 100644 index 00000000000000..0e3fca53ddbf77 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_qnli_from_scratch_custom_tokenizer_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tiny_mlm_glue_qnli_from_scratch_custom_tokenizer BertEmbeddings from muhtasham +author: John Snow Labs +name: tiny_mlm_glue_qnli_from_scratch_custom_tokenizer +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tiny_mlm_glue_qnli_from_scratch_custom_tokenizer` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tiny_mlm_glue_qnli_from_scratch_custom_tokenizer_en_5.1.1_3.0_1694551714525.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tiny_mlm_glue_qnli_from_scratch_custom_tokenizer_en_5.1.1_3.0_1694551714525.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tiny_mlm_glue_qnli_from_scratch_custom_tokenizer","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tiny_mlm_glue_qnli_from_scratch_custom_tokenizer", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tiny_mlm_glue_qnli_from_scratch_custom_tokenizer| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|16.6 MB| + +## References + +https://huggingface.co/muhtasham/tiny-mlm-glue-qnli-from-scratch-custom-tokenizer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_qnli_from_scratch_custom_tokenizer_expand_vocab_en.md b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_qnli_from_scratch_custom_tokenizer_expand_vocab_en.md new file mode 100644 index 00000000000000..478509e8a9a56e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_qnli_from_scratch_custom_tokenizer_expand_vocab_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tiny_mlm_glue_qnli_from_scratch_custom_tokenizer_expand_vocab BertEmbeddings from muhtasham +author: John Snow Labs +name: tiny_mlm_glue_qnli_from_scratch_custom_tokenizer_expand_vocab +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tiny_mlm_glue_qnli_from_scratch_custom_tokenizer_expand_vocab` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tiny_mlm_glue_qnli_from_scratch_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694562151869.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tiny_mlm_glue_qnli_from_scratch_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694562151869.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tiny_mlm_glue_qnli_from_scratch_custom_tokenizer_expand_vocab","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tiny_mlm_glue_qnli_from_scratch_custom_tokenizer_expand_vocab", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tiny_mlm_glue_qnli_from_scratch_custom_tokenizer_expand_vocab| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|23.4 MB| + +## References + +https://huggingface.co/muhtasham/tiny-mlm-glue-qnli-from-scratch-custom-tokenizer-expand-vocab \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_qqp_custom_tokenizer_en.md b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_qqp_custom_tokenizer_en.md new file mode 100644 index 00000000000000..6909097034f0a1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_qqp_custom_tokenizer_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tiny_mlm_glue_qqp_custom_tokenizer BertEmbeddings from muhtasham +author: John Snow Labs +name: tiny_mlm_glue_qqp_custom_tokenizer +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tiny_mlm_glue_qqp_custom_tokenizer` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tiny_mlm_glue_qqp_custom_tokenizer_en_5.1.1_3.0_1694510502065.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tiny_mlm_glue_qqp_custom_tokenizer_en_5.1.1_3.0_1694510502065.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tiny_mlm_glue_qqp_custom_tokenizer","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tiny_mlm_glue_qqp_custom_tokenizer", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tiny_mlm_glue_qqp_custom_tokenizer| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|16.5 MB| + +## References + +https://huggingface.co/muhtasham/tiny-mlm-glue-qqp-custom-tokenizer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_qqp_custom_tokenizer_expand_vocab_en.md b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_qqp_custom_tokenizer_expand_vocab_en.md new file mode 100644 index 00000000000000..3cf412c1eaba1d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_qqp_custom_tokenizer_expand_vocab_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tiny_mlm_glue_qqp_custom_tokenizer_expand_vocab BertEmbeddings from muhtasham +author: John Snow Labs +name: tiny_mlm_glue_qqp_custom_tokenizer_expand_vocab +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tiny_mlm_glue_qqp_custom_tokenizer_expand_vocab` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tiny_mlm_glue_qqp_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694562240997.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tiny_mlm_glue_qqp_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694562240997.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tiny_mlm_glue_qqp_custom_tokenizer_expand_vocab","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tiny_mlm_glue_qqp_custom_tokenizer_expand_vocab", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tiny_mlm_glue_qqp_custom_tokenizer_expand_vocab| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|20.6 MB| + +## References + +https://huggingface.co/muhtasham/tiny-mlm-glue-qqp-custom-tokenizer-expand-vocab \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_rte_custom_tokenizer_en.md b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_rte_custom_tokenizer_en.md new file mode 100644 index 00000000000000..7246fe54c0f393 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_rte_custom_tokenizer_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tiny_mlm_glue_rte_custom_tokenizer BertEmbeddings from muhtasham +author: John Snow Labs +name: tiny_mlm_glue_rte_custom_tokenizer +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tiny_mlm_glue_rte_custom_tokenizer` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tiny_mlm_glue_rte_custom_tokenizer_en_5.1.1_3.0_1694510577450.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tiny_mlm_glue_rte_custom_tokenizer_en_5.1.1_3.0_1694510577450.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tiny_mlm_glue_rte_custom_tokenizer","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tiny_mlm_glue_rte_custom_tokenizer", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tiny_mlm_glue_rte_custom_tokenizer| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|16.6 MB| + +## References + +https://huggingface.co/muhtasham/tiny-mlm-glue-rte-custom-tokenizer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_rte_custom_tokenizer_expand_vocab_en.md b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_rte_custom_tokenizer_expand_vocab_en.md new file mode 100644 index 00000000000000..09ab6e1ca7c962 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_rte_custom_tokenizer_expand_vocab_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tiny_mlm_glue_rte_custom_tokenizer_expand_vocab BertEmbeddings from muhtasham +author: John Snow Labs +name: tiny_mlm_glue_rte_custom_tokenizer_expand_vocab +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tiny_mlm_glue_rte_custom_tokenizer_expand_vocab` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tiny_mlm_glue_rte_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694562388375.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tiny_mlm_glue_rte_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694562388375.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tiny_mlm_glue_rte_custom_tokenizer_expand_vocab","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tiny_mlm_glue_rte_custom_tokenizer_expand_vocab", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tiny_mlm_glue_rte_custom_tokenizer_expand_vocab| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|21.5 MB| + +## References + +https://huggingface.co/muhtasham/tiny-mlm-glue-rte-custom-tokenizer-expand-vocab \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_rte_en.md b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_rte_en.md new file mode 100644 index 00000000000000..05284e64932d0a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_rte_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tiny_mlm_glue_rte BertEmbeddings from muhtasham +author: John Snow Labs +name: tiny_mlm_glue_rte +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tiny_mlm_glue_rte` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tiny_mlm_glue_rte_en_5.1.1_3.0_1694507941819.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tiny_mlm_glue_rte_en_5.1.1_3.0_1694507941819.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tiny_mlm_glue_rte","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tiny_mlm_glue_rte", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tiny_mlm_glue_rte| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|16.7 MB| + +## References + +https://huggingface.co/muhtasham/tiny-mlm-glue-rte \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_rte_from_scratch_custom_tokenizer_expand_vocab_en.md b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_rte_from_scratch_custom_tokenizer_expand_vocab_en.md new file mode 100644 index 00000000000000..e19640033c6f5f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_rte_from_scratch_custom_tokenizer_expand_vocab_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tiny_mlm_glue_rte_from_scratch_custom_tokenizer_expand_vocab BertEmbeddings from muhtasham +author: John Snow Labs +name: tiny_mlm_glue_rte_from_scratch_custom_tokenizer_expand_vocab +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tiny_mlm_glue_rte_from_scratch_custom_tokenizer_expand_vocab` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tiny_mlm_glue_rte_from_scratch_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694562556409.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tiny_mlm_glue_rte_from_scratch_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694562556409.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tiny_mlm_glue_rte_from_scratch_custom_tokenizer_expand_vocab","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tiny_mlm_glue_rte_from_scratch_custom_tokenizer_expand_vocab", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tiny_mlm_glue_rte_from_scratch_custom_tokenizer_expand_vocab| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|21.2 MB| + +## References + +https://huggingface.co/muhtasham/tiny-mlm-glue-rte-from-scratch-custom-tokenizer-expand-vocab \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_sst2_custom_tokenizer_en.md b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_sst2_custom_tokenizer_en.md new file mode 100644 index 00000000000000..ba54ab1e43eaac --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_sst2_custom_tokenizer_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tiny_mlm_glue_sst2_custom_tokenizer BertEmbeddings from muhtasham +author: John Snow Labs +name: tiny_mlm_glue_sst2_custom_tokenizer +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tiny_mlm_glue_sst2_custom_tokenizer` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tiny_mlm_glue_sst2_custom_tokenizer_en_5.1.1_3.0_1694510651635.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tiny_mlm_glue_sst2_custom_tokenizer_en_5.1.1_3.0_1694510651635.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tiny_mlm_glue_sst2_custom_tokenizer","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tiny_mlm_glue_sst2_custom_tokenizer", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tiny_mlm_glue_sst2_custom_tokenizer| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|16.5 MB| + +## References + +https://huggingface.co/muhtasham/tiny-mlm-glue-sst2-custom-tokenizer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_sst2_custom_tokenizer_expand_vocab_en.md b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_sst2_custom_tokenizer_expand_vocab_en.md new file mode 100644 index 00000000000000..f9dad70b89ea83 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_sst2_custom_tokenizer_expand_vocab_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tiny_mlm_glue_sst2_custom_tokenizer_expand_vocab BertEmbeddings from muhtasham +author: John Snow Labs +name: tiny_mlm_glue_sst2_custom_tokenizer_expand_vocab +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tiny_mlm_glue_sst2_custom_tokenizer_expand_vocab` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tiny_mlm_glue_sst2_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694562631751.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tiny_mlm_glue_sst2_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694562631751.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tiny_mlm_glue_sst2_custom_tokenizer_expand_vocab","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tiny_mlm_glue_sst2_custom_tokenizer_expand_vocab", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tiny_mlm_glue_sst2_custom_tokenizer_expand_vocab| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|20.1 MB| + +## References + +https://huggingface.co/muhtasham/tiny-mlm-glue-sst2-custom-tokenizer-expand-vocab \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_sst2_en.md b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_sst2_en.md new file mode 100644 index 00000000000000..6933cb5f41fb70 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_sst2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tiny_mlm_glue_sst2 BertEmbeddings from muhtasham +author: John Snow Labs +name: tiny_mlm_glue_sst2 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tiny_mlm_glue_sst2` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tiny_mlm_glue_sst2_en_5.1.1_3.0_1694508015211.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tiny_mlm_glue_sst2_en_5.1.1_3.0_1694508015211.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tiny_mlm_glue_sst2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tiny_mlm_glue_sst2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tiny_mlm_glue_sst2| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|16.7 MB| + +## References + +https://huggingface.co/muhtasham/tiny-mlm-glue-sst2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_sst2_from_scratch_custom_tokenizer_expand_vocab_en.md b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_sst2_from_scratch_custom_tokenizer_expand_vocab_en.md new file mode 100644 index 00000000000000..61d3b9de6cc152 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_sst2_from_scratch_custom_tokenizer_expand_vocab_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tiny_mlm_glue_sst2_from_scratch_custom_tokenizer_expand_vocab BertEmbeddings from muhtasham +author: John Snow Labs +name: tiny_mlm_glue_sst2_from_scratch_custom_tokenizer_expand_vocab +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tiny_mlm_glue_sst2_from_scratch_custom_tokenizer_expand_vocab` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tiny_mlm_glue_sst2_from_scratch_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694562828413.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tiny_mlm_glue_sst2_from_scratch_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694562828413.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tiny_mlm_glue_sst2_from_scratch_custom_tokenizer_expand_vocab","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tiny_mlm_glue_sst2_from_scratch_custom_tokenizer_expand_vocab", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tiny_mlm_glue_sst2_from_scratch_custom_tokenizer_expand_vocab| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|19.9 MB| + +## References + +https://huggingface.co/muhtasham/tiny-mlm-glue-sst2-from-scratch-custom-tokenizer-expand-vocab \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_stsb_custom_tokenizer_en.md b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_stsb_custom_tokenizer_en.md new file mode 100644 index 00000000000000..262b9d228c901d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_stsb_custom_tokenizer_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tiny_mlm_glue_stsb_custom_tokenizer BertEmbeddings from muhtasham +author: John Snow Labs +name: tiny_mlm_glue_stsb_custom_tokenizer +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tiny_mlm_glue_stsb_custom_tokenizer` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tiny_mlm_glue_stsb_custom_tokenizer_en_5.1.1_3.0_1694510712289.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tiny_mlm_glue_stsb_custom_tokenizer_en_5.1.1_3.0_1694510712289.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tiny_mlm_glue_stsb_custom_tokenizer","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tiny_mlm_glue_stsb_custom_tokenizer", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tiny_mlm_glue_stsb_custom_tokenizer| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|16.5 MB| + +## References + +https://huggingface.co/muhtasham/tiny-mlm-glue-stsb-custom-tokenizer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_stsb_custom_tokenizer_expand_vocab_en.md b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_stsb_custom_tokenizer_expand_vocab_en.md new file mode 100644 index 00000000000000..33f2db9d861ce5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_stsb_custom_tokenizer_expand_vocab_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tiny_mlm_glue_stsb_custom_tokenizer_expand_vocab BertEmbeddings from muhtasham +author: John Snow Labs +name: tiny_mlm_glue_stsb_custom_tokenizer_expand_vocab +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tiny_mlm_glue_stsb_custom_tokenizer_expand_vocab` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tiny_mlm_glue_stsb_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694562907425.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tiny_mlm_glue_stsb_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694562907425.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tiny_mlm_glue_stsb_custom_tokenizer_expand_vocab","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tiny_mlm_glue_stsb_custom_tokenizer_expand_vocab", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tiny_mlm_glue_stsb_custom_tokenizer_expand_vocab| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|19.3 MB| + +## References + +https://huggingface.co/muhtasham/tiny-mlm-glue-stsb-custom-tokenizer-expand-vocab \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_stsb_en.md b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_stsb_en.md new file mode 100644 index 00000000000000..f328d8d1a3cfcb --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_stsb_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tiny_mlm_glue_stsb BertEmbeddings from muhtasham +author: John Snow Labs +name: tiny_mlm_glue_stsb +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tiny_mlm_glue_stsb` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tiny_mlm_glue_stsb_en_5.1.1_3.0_1694508086728.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tiny_mlm_glue_stsb_en_5.1.1_3.0_1694508086728.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tiny_mlm_glue_stsb","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tiny_mlm_glue_stsb", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tiny_mlm_glue_stsb| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|16.7 MB| + +## References + +https://huggingface.co/muhtasham/tiny-mlm-glue-stsb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_stsb_from_scratch_custom_tokenizer_expand_vocab_en.md b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_stsb_from_scratch_custom_tokenizer_expand_vocab_en.md new file mode 100644 index 00000000000000..0bfa6281e1b19d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_stsb_from_scratch_custom_tokenizer_expand_vocab_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tiny_mlm_glue_stsb_from_scratch_custom_tokenizer_expand_vocab BertEmbeddings from muhtasham +author: John Snow Labs +name: tiny_mlm_glue_stsb_from_scratch_custom_tokenizer_expand_vocab +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tiny_mlm_glue_stsb_from_scratch_custom_tokenizer_expand_vocab` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tiny_mlm_glue_stsb_from_scratch_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694563003418.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tiny_mlm_glue_stsb_from_scratch_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694563003418.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tiny_mlm_glue_stsb_from_scratch_custom_tokenizer_expand_vocab","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tiny_mlm_glue_stsb_from_scratch_custom_tokenizer_expand_vocab", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tiny_mlm_glue_stsb_from_scratch_custom_tokenizer_expand_vocab| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|19.1 MB| + +## References + +https://huggingface.co/muhtasham/tiny-mlm-glue-stsb-from-scratch-custom-tokenizer-expand-vocab \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_wnli_custom_tokenizer_en.md b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_wnli_custom_tokenizer_en.md new file mode 100644 index 00000000000000..88db108e7cdc39 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_wnli_custom_tokenizer_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tiny_mlm_glue_wnli_custom_tokenizer BertEmbeddings from muhtasham +author: John Snow Labs +name: tiny_mlm_glue_wnli_custom_tokenizer +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tiny_mlm_glue_wnli_custom_tokenizer` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tiny_mlm_glue_wnli_custom_tokenizer_en_5.1.1_3.0_1694510789128.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tiny_mlm_glue_wnli_custom_tokenizer_en_5.1.1_3.0_1694510789128.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tiny_mlm_glue_wnli_custom_tokenizer","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tiny_mlm_glue_wnli_custom_tokenizer", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tiny_mlm_glue_wnli_custom_tokenizer| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|16.4 MB| + +## References + +https://huggingface.co/muhtasham/tiny-mlm-glue-wnli-custom-tokenizer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_wnli_custom_tokenizer_expand_vocab_en.md b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_wnli_custom_tokenizer_expand_vocab_en.md new file mode 100644 index 00000000000000..da84a639e3f14d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_wnli_custom_tokenizer_expand_vocab_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tiny_mlm_glue_wnli_custom_tokenizer_expand_vocab BertEmbeddings from muhtasham +author: John Snow Labs +name: tiny_mlm_glue_wnli_custom_tokenizer_expand_vocab +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tiny_mlm_glue_wnli_custom_tokenizer_expand_vocab` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tiny_mlm_glue_wnli_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694563181376.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tiny_mlm_glue_wnli_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694563181376.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tiny_mlm_glue_wnli_custom_tokenizer_expand_vocab","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tiny_mlm_glue_wnli_custom_tokenizer_expand_vocab", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tiny_mlm_glue_wnli_custom_tokenizer_expand_vocab| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|16.9 MB| + +## References + +https://huggingface.co/muhtasham/tiny-mlm-glue-wnli-custom-tokenizer-expand-vocab \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_wnli_en.md b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_wnli_en.md new file mode 100644 index 00000000000000..94cd0e4bf27200 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_wnli_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tiny_mlm_glue_wnli BertEmbeddings from muhtasham +author: John Snow Labs +name: tiny_mlm_glue_wnli +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tiny_mlm_glue_wnli` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tiny_mlm_glue_wnli_en_5.1.1_3.0_1694508154889.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tiny_mlm_glue_wnli_en_5.1.1_3.0_1694508154889.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tiny_mlm_glue_wnli","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tiny_mlm_glue_wnli", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tiny_mlm_glue_wnli| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|16.7 MB| + +## References + +https://huggingface.co/muhtasham/tiny-mlm-glue-wnli \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_imdb_from_scratch_en.md b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_imdb_from_scratch_en.md new file mode 100644 index 00000000000000..369e2feb274541 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_imdb_from_scratch_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tiny_mlm_imdb_from_scratch BertEmbeddings from muhtasham +author: John Snow Labs +name: tiny_mlm_imdb_from_scratch +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tiny_mlm_imdb_from_scratch` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tiny_mlm_imdb_from_scratch_en_5.1.1_3.0_1694556163748.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tiny_mlm_imdb_from_scratch_en_5.1.1_3.0_1694556163748.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tiny_mlm_imdb_from_scratch","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tiny_mlm_imdb_from_scratch", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tiny_mlm_imdb_from_scratch| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|16.6 MB| + +## References + +https://huggingface.co/muhtasham/tiny-mlm-imdb-from-scratch \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_rotten_tomatoes_custom_tokenizer_en.md b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_rotten_tomatoes_custom_tokenizer_en.md new file mode 100644 index 00000000000000..d39c2419c31401 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_rotten_tomatoes_custom_tokenizer_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tiny_mlm_rotten_tomatoes_custom_tokenizer BertEmbeddings from muhtasham +author: John Snow Labs +name: tiny_mlm_rotten_tomatoes_custom_tokenizer +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tiny_mlm_rotten_tomatoes_custom_tokenizer` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tiny_mlm_rotten_tomatoes_custom_tokenizer_en_5.1.1_3.0_1694548541463.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tiny_mlm_rotten_tomatoes_custom_tokenizer_en_5.1.1_3.0_1694548541463.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tiny_mlm_rotten_tomatoes_custom_tokenizer","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tiny_mlm_rotten_tomatoes_custom_tokenizer", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tiny_mlm_rotten_tomatoes_custom_tokenizer| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|16.6 MB| + +## References + +https://huggingface.co/muhtasham/tiny-mlm-rotten_tomatoes-custom-tokenizer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_rotten_tomatoes_en.md b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_rotten_tomatoes_en.md new file mode 100644 index 00000000000000..12fa5aab55c4b4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_rotten_tomatoes_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tiny_mlm_rotten_tomatoes BertEmbeddings from muhtasham +author: John Snow Labs +name: tiny_mlm_rotten_tomatoes +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tiny_mlm_rotten_tomatoes` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tiny_mlm_rotten_tomatoes_en_5.1.1_3.0_1694548478733.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tiny_mlm_rotten_tomatoes_en_5.1.1_3.0_1694548478733.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tiny_mlm_rotten_tomatoes","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tiny_mlm_rotten_tomatoes", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tiny_mlm_rotten_tomatoes| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|16.7 MB| + +## References + +https://huggingface.co/muhtasham/tiny-mlm-rotten_tomatoes \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_rotten_tomatoes_from_scratch_en.md b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_rotten_tomatoes_from_scratch_en.md new file mode 100644 index 00000000000000..6daac517dfb2b6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_rotten_tomatoes_from_scratch_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tiny_mlm_rotten_tomatoes_from_scratch BertEmbeddings from muhtasham +author: John Snow Labs +name: tiny_mlm_rotten_tomatoes_from_scratch +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tiny_mlm_rotten_tomatoes_from_scratch` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tiny_mlm_rotten_tomatoes_from_scratch_en_5.1.1_3.0_1694555692732.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tiny_mlm_rotten_tomatoes_from_scratch_en_5.1.1_3.0_1694555692732.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tiny_mlm_rotten_tomatoes_from_scratch","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tiny_mlm_rotten_tomatoes_from_scratch", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tiny_mlm_rotten_tomatoes_from_scratch| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|16.5 MB| + +## References + +https://huggingface.co/muhtasham/tiny-mlm-rotten_tomatoes-from-scratch \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_snli_custom_tokenizer_en.md b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_snli_custom_tokenizer_en.md new file mode 100644 index 00000000000000..4ca9d57aca3214 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_snli_custom_tokenizer_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tiny_mlm_snli_custom_tokenizer BertEmbeddings from muhtasham +author: John Snow Labs +name: tiny_mlm_snli_custom_tokenizer +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tiny_mlm_snli_custom_tokenizer` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tiny_mlm_snli_custom_tokenizer_en_5.1.1_3.0_1694549373032.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tiny_mlm_snli_custom_tokenizer_en_5.1.1_3.0_1694549373032.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tiny_mlm_snli_custom_tokenizer","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tiny_mlm_snli_custom_tokenizer", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tiny_mlm_snli_custom_tokenizer| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|16.4 MB| + +## References + +https://huggingface.co/muhtasham/tiny-mlm-snli-custom-tokenizer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_snli_from_scratch_en.md b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_snli_from_scratch_en.md new file mode 100644 index 00000000000000..c9ceda951d0c43 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_snli_from_scratch_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tiny_mlm_snli_from_scratch BertEmbeddings from muhtasham +author: John Snow Labs +name: tiny_mlm_snli_from_scratch +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tiny_mlm_snli_from_scratch` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tiny_mlm_snli_from_scratch_en_5.1.1_3.0_1694555995371.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tiny_mlm_snli_from_scratch_en_5.1.1_3.0_1694555995371.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tiny_mlm_snli_from_scratch","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tiny_mlm_snli_from_scratch", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tiny_mlm_snli_from_scratch| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|16.4 MB| + +## References + +https://huggingface.co/muhtasham/tiny-mlm-snli-from-scratch \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_squad_custom_tokenizer_en.md b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_squad_custom_tokenizer_en.md new file mode 100644 index 00000000000000..e0dcd2d50a5ab6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_squad_custom_tokenizer_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tiny_mlm_squad_custom_tokenizer BertEmbeddings from muhtasham +author: John Snow Labs +name: tiny_mlm_squad_custom_tokenizer +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tiny_mlm_squad_custom_tokenizer` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tiny_mlm_squad_custom_tokenizer_en_5.1.1_3.0_1694547599498.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tiny_mlm_squad_custom_tokenizer_en_5.1.1_3.0_1694547599498.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tiny_mlm_squad_custom_tokenizer","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tiny_mlm_squad_custom_tokenizer", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tiny_mlm_squad_custom_tokenizer| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|16.5 MB| + +## References + +https://huggingface.co/muhtasham/tiny-mlm-squad-custom-tokenizer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_squad_en.md b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_squad_en.md new file mode 100644 index 00000000000000..44e83102346a03 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_squad_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tiny_mlm_squad BertEmbeddings from muhtasham +author: John Snow Labs +name: tiny_mlm_squad +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tiny_mlm_squad` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tiny_mlm_squad_en_5.1.1_3.0_1694547423091.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tiny_mlm_squad_en_5.1.1_3.0_1694547423091.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tiny_mlm_squad","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tiny_mlm_squad", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tiny_mlm_squad| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|16.7 MB| + +## References + +https://huggingface.co/muhtasham/tiny-mlm-squad \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_tweet_eval_from_scratch_en.md b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_tweet_eval_from_scratch_en.md new file mode 100644 index 00000000000000..3105b80e311ead --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_tweet_eval_from_scratch_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tiny_mlm_tweet_eval_from_scratch BertEmbeddings from muhtasham +author: John Snow Labs +name: tiny_mlm_tweet_eval_from_scratch +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tiny_mlm_tweet_eval_from_scratch` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tiny_mlm_tweet_eval_from_scratch_en_5.1.1_3.0_1694555163619.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tiny_mlm_tweet_eval_from_scratch_en_5.1.1_3.0_1694555163619.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tiny_mlm_tweet_eval_from_scratch","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tiny_mlm_tweet_eval_from_scratch", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tiny_mlm_tweet_eval_from_scratch| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|16.6 MB| + +## References + +https://huggingface.co/muhtasham/tiny-mlm-tweet_eval-from-scratch \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_wikitext_custom_tokenizer_en.md b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_wikitext_custom_tokenizer_en.md new file mode 100644 index 00000000000000..23a14f4c6ed1a1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_wikitext_custom_tokenizer_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tiny_mlm_wikitext_custom_tokenizer BertEmbeddings from muhtasham +author: John Snow Labs +name: tiny_mlm_wikitext_custom_tokenizer +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tiny_mlm_wikitext_custom_tokenizer` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tiny_mlm_wikitext_custom_tokenizer_en_5.1.1_3.0_1694548410883.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tiny_mlm_wikitext_custom_tokenizer_en_5.1.1_3.0_1694548410883.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tiny_mlm_wikitext_custom_tokenizer","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tiny_mlm_wikitext_custom_tokenizer", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tiny_mlm_wikitext_custom_tokenizer| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|16.6 MB| + +## References + +https://huggingface.co/muhtasham/tiny-mlm-wikitext-custom-tokenizer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_wikitext_en.md b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_wikitext_en.md new file mode 100644 index 00000000000000..718a20122eed20 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_wikitext_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tiny_mlm_wikitext BertEmbeddings from muhtasham +author: John Snow Labs +name: tiny_mlm_wikitext +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tiny_mlm_wikitext` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tiny_mlm_wikitext_en_5.1.1_3.0_1694548328415.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tiny_mlm_wikitext_en_5.1.1_3.0_1694548328415.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tiny_mlm_wikitext","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tiny_mlm_wikitext", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tiny_mlm_wikitext| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|16.7 MB| + +## References + +https://huggingface.co/muhtasham/tiny-mlm-wikitext \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_wikitext_from_scratch_custom_tokenizer_en.md b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_wikitext_from_scratch_custom_tokenizer_en.md new file mode 100644 index 00000000000000..1afc42653b28cb --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_wikitext_from_scratch_custom_tokenizer_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tiny_mlm_wikitext_from_scratch_custom_tokenizer BertEmbeddings from muhtasham +author: John Snow Labs +name: tiny_mlm_wikitext_from_scratch_custom_tokenizer +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tiny_mlm_wikitext_from_scratch_custom_tokenizer` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tiny_mlm_wikitext_from_scratch_custom_tokenizer_en_5.1.1_3.0_1694554282672.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tiny_mlm_wikitext_from_scratch_custom_tokenizer_en_5.1.1_3.0_1694554282672.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tiny_mlm_wikitext_from_scratch_custom_tokenizer","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tiny_mlm_wikitext_from_scratch_custom_tokenizer", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tiny_mlm_wikitext_from_scratch_custom_tokenizer| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|16.6 MB| + +## References + +https://huggingface.co/muhtasham/tiny-mlm-wikitext-from-scratch-custom-tokenizer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_wikitext_from_scratch_en.md b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_wikitext_from_scratch_en.md new file mode 100644 index 00000000000000..7e8d46e34f0bee --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_wikitext_from_scratch_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tiny_mlm_wikitext_from_scratch BertEmbeddings from muhtasham +author: John Snow Labs +name: tiny_mlm_wikitext_from_scratch +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tiny_mlm_wikitext_from_scratch` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tiny_mlm_wikitext_from_scratch_en_5.1.1_3.0_1694555916739.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tiny_mlm_wikitext_from_scratch_en_5.1.1_3.0_1694555916739.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tiny_mlm_wikitext_from_scratch","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tiny_mlm_wikitext_from_scratch", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tiny_mlm_wikitext_from_scratch| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|16.6 MB| + +## References + +https://huggingface.co/muhtasham/tiny-mlm-wikitext-from-scratch \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-twitch_bert_base_cased_pytorch_en.md b/docs/_posts/ahmedlone127/2023-09-12-twitch_bert_base_cased_pytorch_en.md new file mode 100644 index 00000000000000..da34a0b57d8d48 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-twitch_bert_base_cased_pytorch_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English twitch_bert_base_cased_pytorch BertEmbeddings from veb +author: John Snow Labs +name: twitch_bert_base_cased_pytorch +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`twitch_bert_base_cased_pytorch` is a English model originally trained by veb. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/twitch_bert_base_cased_pytorch_en_5.1.1_3.0_1694554479279.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/twitch_bert_base_cased_pytorch_en_5.1.1_3.0_1694554479279.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("twitch_bert_base_cased_pytorch","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("twitch_bert_base_cased_pytorch", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|twitch_bert_base_cased_pytorch| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/veb/twitch-bert-base-cased-pytorch \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-ugmultbert_e1_en.md b/docs/_posts/ahmedlone127/2023-09-12-ugmultbert_e1_en.md new file mode 100644 index 00000000000000..dad45e9deeb701 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-ugmultbert_e1_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English ugmultbert_e1 BertEmbeddings from TurkLangsTeamURFU +author: John Snow Labs +name: ugmultbert_e1 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ugmultbert_e1` is a English model originally trained by TurkLangsTeamURFU. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ugmultbert_e1_en_5.1.1_3.0_1694550516279.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ugmultbert_e1_en_5.1.1_3.0_1694550516279.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("ugmultbert_e1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("ugmultbert_e1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ugmultbert_e1| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|494.4 MB| + +## References + +https://huggingface.co/TurkLangsTeamURFU/UgMultBERT_e1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-ugmultbert_e2_en.md b/docs/_posts/ahmedlone127/2023-09-12-ugmultbert_e2_en.md new file mode 100644 index 00000000000000..e4235ef45efb29 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-ugmultbert_e2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English ugmultbert_e2 BertEmbeddings from TurkLangsTeamURFU +author: John Snow Labs +name: ugmultbert_e2 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ugmultbert_e2` is a English model originally trained by TurkLangsTeamURFU. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ugmultbert_e2_en_5.1.1_3.0_1694551678403.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ugmultbert_e2_en_5.1.1_3.0_1694551678403.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("ugmultbert_e2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("ugmultbert_e2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ugmultbert_e2| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|494.2 MB| + +## References + +https://huggingface.co/TurkLangsTeamURFU/UgMultBERT_e2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-ugmultbert_e3_en.md b/docs/_posts/ahmedlone127/2023-09-12-ugmultbert_e3_en.md new file mode 100644 index 00000000000000..63facb54a8d3a4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-ugmultbert_e3_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English ugmultbert_e3 BertEmbeddings from TurkLangsTeamURFU +author: John Snow Labs +name: ugmultbert_e3 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ugmultbert_e3` is a English model originally trained by TurkLangsTeamURFU. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ugmultbert_e3_en_5.1.1_3.0_1694551842232.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ugmultbert_e3_en_5.1.1_3.0_1694551842232.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("ugmultbert_e3","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("ugmultbert_e3", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ugmultbert_e3| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|494.0 MB| + +## References + +https://huggingface.co/TurkLangsTeamURFU/UgMultBERT_e3 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-ugmultmtokbert_3e_en.md b/docs/_posts/ahmedlone127/2023-09-12-ugmultmtokbert_3e_en.md new file mode 100644 index 00000000000000..0c2e7e90013903 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-ugmultmtokbert_3e_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English ugmultmtokbert_3e BertEmbeddings from TurkLangsTeamURFU +author: John Snow Labs +name: ugmultmtokbert_3e +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ugmultmtokbert_3e` is a English model originally trained by TurkLangsTeamURFU. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ugmultmtokbert_3e_en_5.1.1_3.0_1694551069053.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ugmultmtokbert_3e_en_5.1.1_3.0_1694551069053.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("ugmultmtokbert_3e","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("ugmultmtokbert_3e", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ugmultmtokbert_3e| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|742.6 MB| + +## References + +https://huggingface.co/TurkLangsTeamURFU/UgMultMTokBERT_3e \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-ugturkbert_e1_en.md b/docs/_posts/ahmedlone127/2023-09-12-ugturkbert_e1_en.md new file mode 100644 index 00000000000000..841bbafc098785 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-ugturkbert_e1_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English ugturkbert_e1 BertEmbeddings from TurkLangsTeamURFU +author: John Snow Labs +name: ugturkbert_e1 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ugturkbert_e1` is a English model originally trained by TurkLangsTeamURFU. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ugturkbert_e1_en_5.1.1_3.0_1694551986988.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ugturkbert_e1_en_5.1.1_3.0_1694551986988.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("ugturkbert_e1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("ugturkbert_e1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ugturkbert_e1| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|494.3 MB| + +## References + +https://huggingface.co/TurkLangsTeamURFU/UgTurkBERT_e1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-ugturkbert_e2_en.md b/docs/_posts/ahmedlone127/2023-09-12-ugturkbert_e2_en.md new file mode 100644 index 00000000000000..1cb2829bc16a32 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-ugturkbert_e2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English ugturkbert_e2 BertEmbeddings from TurkLangsTeamURFU +author: John Snow Labs +name: ugturkbert_e2 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ugturkbert_e2` is a English model originally trained by TurkLangsTeamURFU. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ugturkbert_e2_en_5.1.1_3.0_1694552141789.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ugturkbert_e2_en_5.1.1_3.0_1694552141789.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("ugturkbert_e2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("ugturkbert_e2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ugturkbert_e2| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|494.2 MB| + +## References + +https://huggingface.co/TurkLangsTeamURFU/UgTurkBERT_e2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-ugturkbert_e3_en.md b/docs/_posts/ahmedlone127/2023-09-12-ugturkbert_e3_en.md new file mode 100644 index 00000000000000..5d30aadeabb4c4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-ugturkbert_e3_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English ugturkbert_e3 BertEmbeddings from TurkLangsTeamURFU +author: John Snow Labs +name: ugturkbert_e3 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ugturkbert_e3` is a English model originally trained by TurkLangsTeamURFU. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ugturkbert_e3_en_5.1.1_3.0_1694552327758.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ugturkbert_e3_en_5.1.1_3.0_1694552327758.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("ugturkbert_e3","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("ugturkbert_e3", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ugturkbert_e3| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|494.1 MB| + +## References + +https://huggingface.co/TurkLangsTeamURFU/UgTurkBERT_e3 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-xbert2_en.md b/docs/_posts/ahmedlone127/2023-09-12-xbert2_en.md new file mode 100644 index 00000000000000..4f76afc566c76a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-xbert2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English xbert2 BertEmbeddings from tmc +author: John Snow Labs +name: xbert2 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xbert2` is a English model originally trained by tmc. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xbert2_en_5.1.1_3.0_1694551885455.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xbert2_en_5.1.1_3.0_1694551885455.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("xbert2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("xbert2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xbert2| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/tmc/xbert2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-absa_mlm_1_en.md b/docs/_posts/ahmedlone127/2023-09-13-absa_mlm_1_en.md new file mode 100644 index 00000000000000..d0577d6f17857a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-absa_mlm_1_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English absa_mlm_1 BertEmbeddings from UchihaMadara +author: John Snow Labs +name: absa_mlm_1 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`absa_mlm_1` is a English model originally trained by UchihaMadara. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/absa_mlm_1_en_5.1.1_3.0_1694579162814.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/absa_mlm_1_en_5.1.1_3.0_1694579162814.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("absa_mlm_1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("absa_mlm_1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|absa_mlm_1| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/UchihaMadara/absa-mlm-1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-adrbert_base_p1_en.md b/docs/_posts/ahmedlone127/2023-09-13-adrbert_base_p1_en.md new file mode 100644 index 00000000000000..0caa21b7ce0587 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-adrbert_base_p1_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English adrbert_base_p1 BertEmbeddings from adriansyahdr +author: John Snow Labs +name: adrbert_base_p1 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`adrbert_base_p1` is a English model originally trained by adriansyahdr. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/adrbert_base_p1_en_5.1.1_3.0_1694576788237.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/adrbert_base_p1_en_5.1.1_3.0_1694576788237.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("adrbert_base_p1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("adrbert_base_p1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|adrbert_base_p1| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|470.1 MB| + +## References + +https://huggingface.co/adriansyahdr/adrBert-base-p1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-alberti_bert_base_multilingual_cased_linhd_postdata_xx.md b/docs/_posts/ahmedlone127/2023-09-13-alberti_bert_base_multilingual_cased_linhd_postdata_xx.md new file mode 100644 index 00000000000000..b1f4da96e304e4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-alberti_bert_base_multilingual_cased_linhd_postdata_xx.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Multilingual alberti_bert_base_multilingual_cased_linhd_postdata BertEmbeddings from linhd-postdata +author: John Snow Labs +name: alberti_bert_base_multilingual_cased_linhd_postdata +date: 2023-09-13 +tags: [bert, xx, open_source, fill_mask, onnx] +task: Embeddings +language: xx +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`alberti_bert_base_multilingual_cased_linhd_postdata` is a Multilingual model originally trained by linhd-postdata. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/alberti_bert_base_multilingual_cased_linhd_postdata_xx_5.1.1_3.0_1694577289966.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/alberti_bert_base_multilingual_cased_linhd_postdata_xx_5.1.1_3.0_1694577289966.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("alberti_bert_base_multilingual_cased_linhd_postdata","xx") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("alberti_bert_base_multilingual_cased_linhd_postdata", "xx") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|alberti_bert_base_multilingual_cased_linhd_postdata| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|xx| +|Size:|664.4 MB| + +## References + +https://huggingface.co/linhd-postdata/alberti-bert-base-multilingual-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-algarlegal_base_arabertv2_en.md b/docs/_posts/ahmedlone127/2023-09-13-algarlegal_base_arabertv2_en.md new file mode 100644 index 00000000000000..46c5273fbbb754 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-algarlegal_base_arabertv2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English algarlegal_base_arabertv2 BertEmbeddings from hatemestinbejaia +author: John Snow Labs +name: algarlegal_base_arabertv2 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`algarlegal_base_arabertv2` is a English model originally trained by hatemestinbejaia. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/algarlegal_base_arabertv2_en_5.1.1_3.0_1694566807785.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/algarlegal_base_arabertv2_en_5.1.1_3.0_1694566807785.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("algarlegal_base_arabertv2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("algarlegal_base_arabertv2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|algarlegal_base_arabertv2| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|504.8 MB| + +## References + +https://huggingface.co/hatemestinbejaia/AlgArLegal-base-arabertv2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-algarlegal_large_arabertv2_en.md b/docs/_posts/ahmedlone127/2023-09-13-algarlegal_large_arabertv2_en.md new file mode 100644 index 00000000000000..470ed97e8f00d2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-algarlegal_large_arabertv2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English algarlegal_large_arabertv2 BertEmbeddings from hatemestinbejaia +author: John Snow Labs +name: algarlegal_large_arabertv2 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`algarlegal_large_arabertv2` is a English model originally trained by hatemestinbejaia. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/algarlegal_large_arabertv2_en_5.1.1_3.0_1694566592236.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/algarlegal_large_arabertv2_en_5.1.1_3.0_1694566592236.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("algarlegal_large_arabertv2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("algarlegal_large_arabertv2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|algarlegal_large_arabertv2| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.4 GB| + +## References + +https://huggingface.co/hatemestinbejaia/AlgArLegal-large-arabertv2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-algarlegal_nonumber_base_arabertv2_en.md b/docs/_posts/ahmedlone127/2023-09-13-algarlegal_nonumber_base_arabertv2_en.md new file mode 100644 index 00000000000000..0ed7d52a89de75 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-algarlegal_nonumber_base_arabertv2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English algarlegal_nonumber_base_arabertv2 BertEmbeddings from hatemestinbejaia +author: John Snow Labs +name: algarlegal_nonumber_base_arabertv2 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`algarlegal_nonumber_base_arabertv2` is a English model originally trained by hatemestinbejaia. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/algarlegal_nonumber_base_arabertv2_en_5.1.1_3.0_1694567513179.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/algarlegal_nonumber_base_arabertv2_en_5.1.1_3.0_1694567513179.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("algarlegal_nonumber_base_arabertv2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("algarlegal_nonumber_base_arabertv2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|algarlegal_nonumber_base_arabertv2| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|504.8 MB| + +## References + +https://huggingface.co/hatemestinbejaia/AlgArLegal-Nonumber-base-arabertv2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-algarlegal_nonumber_large_arabertv2_en.md b/docs/_posts/ahmedlone127/2023-09-13-algarlegal_nonumber_large_arabertv2_en.md new file mode 100644 index 00000000000000..1f5104e2a5d6ea --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-algarlegal_nonumber_large_arabertv2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English algarlegal_nonumber_large_arabertv2 BertEmbeddings from hatemestinbejaia +author: John Snow Labs +name: algarlegal_nonumber_large_arabertv2 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`algarlegal_nonumber_large_arabertv2` is a English model originally trained by hatemestinbejaia. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/algarlegal_nonumber_large_arabertv2_en_5.1.1_3.0_1694567316136.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/algarlegal_nonumber_large_arabertv2_en_5.1.1_3.0_1694567316136.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("algarlegal_nonumber_large_arabertv2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("algarlegal_nonumber_large_arabertv2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|algarlegal_nonumber_large_arabertv2| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.4 GB| + +## References + +https://huggingface.co/hatemestinbejaia/AlgArLegal-Nonumber-large-arabertv2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-applicanttrackingsystembert_en.md b/docs/_posts/ahmedlone127/2023-09-13-applicanttrackingsystembert_en.md new file mode 100644 index 00000000000000..7eb98f7748696d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-applicanttrackingsystembert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English applicanttrackingsystembert BertEmbeddings from Shushant +author: John Snow Labs +name: applicanttrackingsystembert +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`applicanttrackingsystembert` is a English model originally trained by Shushant. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/applicanttrackingsystembert_en_5.1.1_3.0_1694574178617.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/applicanttrackingsystembert_en_5.1.1_3.0_1694574178617.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("applicanttrackingsystembert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("applicanttrackingsystembert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|applicanttrackingsystembert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|408.0 MB| + +## References + +https://huggingface.co/Shushant/ApplicantTrackingSystemBERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-ara_dialectbert_ar.md b/docs/_posts/ahmedlone127/2023-09-13-ara_dialectbert_ar.md new file mode 100644 index 00000000000000..4826748be7afc1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-ara_dialectbert_ar.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Arabic ara_dialectbert BertEmbeddings from MutazYoune +author: John Snow Labs +name: ara_dialectbert +date: 2023-09-13 +tags: [bert, ar, open_source, fill_mask, onnx] +task: Embeddings +language: ar +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ara_dialectbert` is a Arabic model originally trained by MutazYoune. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ara_dialectbert_ar_5.1.1_3.0_1694567913022.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ara_dialectbert_ar_5.1.1_3.0_1694567913022.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("ara_dialectbert","ar") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("ara_dialectbert", "ar") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ara_dialectbert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|ar| +|Size:|406.3 MB| + +## References + +https://huggingface.co/MutazYoune/Ara_DialectBERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-arbert_ar.md b/docs/_posts/ahmedlone127/2023-09-13-arbert_ar.md new file mode 100644 index 00000000000000..e3174cd469c425 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-arbert_ar.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Arabic arbert BertEmbeddings from UBC-NLP +author: John Snow Labs +name: arbert +date: 2023-09-13 +tags: [bert, ar, open_source, fill_mask, onnx] +task: Embeddings +language: ar +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`arbert` is a Arabic model originally trained by UBC-NLP. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/arbert_ar_5.1.1_3.0_1694573787008.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/arbert_ar_5.1.1_3.0_1694573787008.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("arbert","ar") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("arbert", "ar") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|arbert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|ar| +|Size:|605.3 MB| + +## References + +https://huggingface.co/UBC-NLP/ARBERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-archeobertje_en.md b/docs/_posts/ahmedlone127/2023-09-13-archeobertje_en.md new file mode 100644 index 00000000000000..95c2c34d71ee75 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-archeobertje_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English archeobertje BertEmbeddings from alexbrandsen +author: John Snow Labs +name: archeobertje +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`archeobertje` is a English model originally trained by alexbrandsen. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/archeobertje_en_5.1.1_3.0_1694578129084.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/archeobertje_en_5.1.1_3.0_1694578129084.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("archeobertje","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("archeobertje", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|archeobertje| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|406.5 MB| + +## References + +https://huggingface.co/alexbrandsen/ArcheoBERTje \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-author_identification_en.md b/docs/_posts/ahmedlone127/2023-09-13-author_identification_en.md new file mode 100644 index 00000000000000..36ba5b6015a9f5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-author_identification_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English author_identification BertEmbeddings from Omar2027 +author: John Snow Labs +name: author_identification +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`author_identification` is a English model originally trained by Omar2027. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/author_identification_en_5.1.1_3.0_1694569409752.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/author_identification_en_5.1.1_3.0_1694569409752.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("author_identification","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("author_identification", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|author_identification| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|505.1 MB| + +## References + +https://huggingface.co/Omar2027/Author_identification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bangla_bert_base_bn.md b/docs/_posts/ahmedlone127/2023-09-13-bangla_bert_base_bn.md new file mode 100644 index 00000000000000..fa3ca966eae826 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bangla_bert_base_bn.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Bengali bangla_bert_base BertEmbeddings from sagorsarker +author: John Snow Labs +name: bangla_bert_base +date: 2023-09-13 +tags: [bert, bn, open_source, fill_mask, onnx] +task: Embeddings +language: bn +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bangla_bert_base` is a Bengali model originally trained by sagorsarker. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bangla_bert_base_bn_5.1.1_3.0_1694569397272.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bangla_bert_base_bn_5.1.1_3.0_1694569397272.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bangla_bert_base","bn") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bangla_bert_base", "bn") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bangla_bert_base| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|bn| +|Size:|614.7 MB| + +## References + +https://huggingface.co/sagorsarker/bangla-bert-base \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bangla_bert_bn.md b/docs/_posts/ahmedlone127/2023-09-13-bangla_bert_bn.md new file mode 100644 index 00000000000000..33c51b5cdcf1ab --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bangla_bert_bn.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Bengali bangla_bert BertEmbeddings from Kowsher +author: John Snow Labs +name: bangla_bert +date: 2023-09-13 +tags: [bert, bn, open_source, fill_mask, onnx] +task: Embeddings +language: bn +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bangla_bert` is a Bengali model originally trained by Kowsher. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bangla_bert_bn_5.1.1_3.0_1694564809999.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bangla_bert_bn_5.1.1_3.0_1694564809999.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bangla_bert","bn") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bangla_bert", "bn") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bangla_bert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|bn| +|Size:|612.1 MB| + +## References + +https://huggingface.co/Kowsher/bangla-bert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-batterybert_cased_en.md b/docs/_posts/ahmedlone127/2023-09-13-batterybert_cased_en.md new file mode 100644 index 00000000000000..5ffeec3a3bcb64 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-batterybert_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English batterybert_cased BertEmbeddings from batterydata +author: John Snow Labs +name: batterybert_cased +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`batterybert_cased` is a English model originally trained by batterydata. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/batterybert_cased_en_5.1.1_3.0_1694585277909.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/batterybert_cased_en_5.1.1_3.0_1694585277909.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("batterybert_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("batterybert_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|batterybert_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.6 MB| + +## References + +https://huggingface.co/batterydata/batterybert-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-batterybert_uncased_en.md b/docs/_posts/ahmedlone127/2023-09-13-batterybert_uncased_en.md new file mode 100644 index 00000000000000..17256679f12a65 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-batterybert_uncased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English batterybert_uncased BertEmbeddings from batterydata +author: John Snow Labs +name: batterybert_uncased +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`batterybert_uncased` is a English model originally trained by batterydata. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/batterybert_uncased_en_5.1.1_3.0_1694585447411.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/batterybert_uncased_en_5.1.1_3.0_1694585447411.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("batterybert_uncased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("batterybert_uncased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|batterybert_uncased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/batterydata/batterybert-uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bbc_gqa_eval_en.md b/docs/_posts/ahmedlone127/2023-09-13-bbc_gqa_eval_en.md new file mode 100644 index 00000000000000..8be4e06e21cb70 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bbc_gqa_eval_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bbc_gqa_eval BertEmbeddings from rjbownes +author: John Snow Labs +name: bbc_gqa_eval +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bbc_gqa_eval` is a English model originally trained by rjbownes. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bbc_gqa_eval_en_5.1.1_3.0_1694568579505.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bbc_gqa_eval_en_5.1.1_3.0_1694568579505.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bbc_gqa_eval","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bbc_gqa_eval", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bbc_gqa_eval| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.6 MB| + +## References + +https://huggingface.co/rjbownes/BBC-GQA-eval \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-beauty_base_klcp2_en.md b/docs/_posts/ahmedlone127/2023-09-13-beauty_base_klcp2_en.md new file mode 100644 index 00000000000000..c9e768c152c595 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-beauty_base_klcp2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English beauty_base_klcp2 BertEmbeddings from Kyoungmin +author: John Snow Labs +name: beauty_base_klcp2 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`beauty_base_klcp2` is a English model originally trained by Kyoungmin. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/beauty_base_klcp2_en_5.1.1_3.0_1694565176499.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/beauty_base_klcp2_en_5.1.1_3.0_1694565176499.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("beauty_base_klcp2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("beauty_base_klcp2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|beauty_base_klcp2| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|406.2 MB| + +## References + +https://huggingface.co/Kyoungmin/beauty-base-KLCP2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-berel_sivan22_he.md b/docs/_posts/ahmedlone127/2023-09-13-berel_sivan22_he.md new file mode 100644 index 00000000000000..5f965cde2c7c69 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-berel_sivan22_he.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Hebrew berel_sivan22 BertEmbeddings from sivan22 +author: John Snow Labs +name: berel_sivan22 +date: 2023-09-13 +tags: [bert, he, open_source, fill_mask, onnx] +task: Embeddings +language: he +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`berel_sivan22` is a Hebrew model originally trained by sivan22. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/berel_sivan22_he_5.1.1_3.0_1694564843869.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/berel_sivan22_he_5.1.1_3.0_1694564843869.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("berel_sivan22","he") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("berel_sivan22", "he") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|berel_sivan22| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|he| +|Size:|690.1 MB| + +## References + +https://huggingface.co/sivan22/BEREL \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_120_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_120_en.md new file mode 100644 index 00000000000000..3eef4b14c03791 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_120_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_120 BertEmbeddings from gokuls +author: John Snow Labs +name: bert_base_120 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_120` is a English model originally trained by gokuls. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_120_en_5.1.1_3.0_1694585728673.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_120_en_5.1.1_3.0_1694585728673.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_120","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_120", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_120| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|408.1 MB| + +## References + +https://huggingface.co/gokuls/bert_base_120 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_24_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_24_en.md new file mode 100644 index 00000000000000..2c9ff0193ede01 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_24_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_24 BertEmbeddings from gokuls +author: John Snow Labs +name: bert_base_24 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_24` is a English model originally trained by gokuls. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_24_en_5.1.1_3.0_1694580140710.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_24_en_5.1.1_3.0_1694580140710.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_24","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_24", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_24| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.9 MB| + +## References + +https://huggingface.co/gokuls/bert_base_24 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_48_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_48_en.md new file mode 100644 index 00000000000000..90b6f1e13f7ff3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_48_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_48 BertEmbeddings from gokuls +author: John Snow Labs +name: bert_base_48 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_48` is a English model originally trained by gokuls. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_48_en_5.1.1_3.0_1694580489996.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_48_en_5.1.1_3.0_1694580489996.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_48","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_48", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_48| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|408.0 MB| + +## References + +https://huggingface.co/gokuls/bert_base_48 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_5lang_cased_xx.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_5lang_cased_xx.md new file mode 100644 index 00000000000000..61728a0b5a0df5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_5lang_cased_xx.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Multilingual bert_base_5lang_cased BertEmbeddings from amine +author: John Snow Labs +name: bert_base_5lang_cased +date: 2023-09-13 +tags: [bert, xx, open_source, fill_mask, onnx] +task: Embeddings +language: xx +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_5lang_cased` is a Multilingual model originally trained by amine. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_5lang_cased_xx_5.1.1_3.0_1694579745976.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_5lang_cased_xx_5.1.1_3.0_1694579745976.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_5lang_cased","xx") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_5lang_cased", "xx") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_5lang_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|xx| +|Size:|461.1 MB| + +## References + +https://huggingface.co/amine/bert-base-5lang-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_72_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_72_en.md new file mode 100644 index 00000000000000..05ade8a445f4af --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_72_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_72 BertEmbeddings from gokuls +author: John Snow Labs +name: bert_base_72 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_72` is a English model originally trained by gokuls. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_72_en_5.1.1_3.0_1694581572483.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_72_en_5.1.1_3.0_1694581572483.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_72","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_72", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_72| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|408.0 MB| + +## References + +https://huggingface.co/gokuls/bert_base_72 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_96_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_96_en.md new file mode 100644 index 00000000000000..866e65f7f7d285 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_96_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_96 BertEmbeddings from gokuls +author: John Snow Labs +name: bert_base_96 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_96` is a English model originally trained by gokuls. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_96_en_5.1.1_3.0_1694581741068.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_96_en_5.1.1_3.0_1694581741068.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_96","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_96", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_96| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|408.0 MB| + +## References + +https://huggingface.co/gokuls/bert_base_96 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_arabertv02_ar.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_arabertv02_ar.md new file mode 100644 index 00000000000000..67d9bda6177435 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_arabertv02_ar.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Arabic bert_base_arabertv02 BertEmbeddings from aubmindlab +author: John Snow Labs +name: bert_base_arabertv02 +date: 2023-09-13 +tags: [bert, ar, open_source, fill_mask, onnx] +task: Embeddings +language: ar +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_arabertv02` is a Arabic model originally trained by aubmindlab. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_arabertv02_ar_5.1.1_3.0_1694583153678.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_arabertv02_ar_5.1.1_3.0_1694583153678.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_arabertv02","ar") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_arabertv02", "ar") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_arabertv02| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|ar| +|Size:|505.1 MB| + +## References + +https://huggingface.co/aubmindlab/bert-base-arabertv02 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_arabertv02_twitter_ar.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_arabertv02_twitter_ar.md new file mode 100644 index 00000000000000..7b2ce7d378f205 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_arabertv02_twitter_ar.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Arabic bert_base_arabertv02_twitter BertEmbeddings from aubmindlab +author: John Snow Labs +name: bert_base_arabertv02_twitter +date: 2023-09-13 +tags: [bert, ar, open_source, fill_mask, onnx] +task: Embeddings +language: ar +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_arabertv02_twitter` is a Arabic model originally trained by aubmindlab. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_arabertv02_twitter_ar_5.1.1_3.0_1694583024700.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_arabertv02_twitter_ar_5.1.1_3.0_1694583024700.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_arabertv02_twitter","ar") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_arabertv02_twitter", "ar") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_arabertv02_twitter| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|ar| +|Size:|505.0 MB| + +## References + +https://huggingface.co/aubmindlab/bert-base-arabertv02-twitter \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_cased_bert_yoga_finetuned_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_cased_bert_yoga_finetuned_en.md new file mode 100644 index 00000000000000..e9283fb3e215af --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_cased_bert_yoga_finetuned_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_cased_bert_yoga_finetuned BertEmbeddings from dsantistevan +author: John Snow Labs +name: bert_base_cased_bert_yoga_finetuned +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_cased_bert_yoga_finetuned` is a English model originally trained by dsantistevan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_cased_bert_yoga_finetuned_en_5.1.1_3.0_1694566346146.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_cased_bert_yoga_finetuned_en_5.1.1_3.0_1694566346146.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_cased_bert_yoga_finetuned","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_cased_bert_yoga_finetuned", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_cased_bert_yoga_finetuned| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.6 MB| + +## References + +https://huggingface.co/dsantistevan/bert-base-cased-bert-yoga-finetuned \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_cased_finetuned_imdb_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_cased_finetuned_imdb_en.md new file mode 100644 index 00000000000000..fbe7bb23bb5b01 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_cased_finetuned_imdb_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_cased_finetuned_imdb BertEmbeddings from TimShieh +author: John Snow Labs +name: bert_base_cased_finetuned_imdb +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_cased_finetuned_imdb` is a English model originally trained by TimShieh. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_cased_finetuned_imdb_en_5.1.1_3.0_1694580692039.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_cased_finetuned_imdb_en_5.1.1_3.0_1694580692039.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_cased_finetuned_imdb","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_cased_finetuned_imdb", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_cased_finetuned_imdb| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.6 MB| + +## References + +https://huggingface.co/TimShieh/bert-base-cased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_cased_finetuned_semeval2017_mlm_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_cased_finetuned_semeval2017_mlm_en.md new file mode 100644 index 00000000000000..14698d2b675138 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_cased_finetuned_semeval2017_mlm_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_cased_finetuned_semeval2017_mlm BertEmbeddings from TimShieh +author: John Snow Labs +name: bert_base_cased_finetuned_semeval2017_mlm +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_cased_finetuned_semeval2017_mlm` is a English model originally trained by TimShieh. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_cased_finetuned_semeval2017_mlm_en_5.1.1_3.0_1694580947575.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_cased_finetuned_semeval2017_mlm_en_5.1.1_3.0_1694580947575.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_cased_finetuned_semeval2017_mlm","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_cased_finetuned_semeval2017_mlm", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_cased_finetuned_semeval2017_mlm| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.6 MB| + +## References + +https://huggingface.co/TimShieh/bert-base-cased-finetuned-semeval2017-MLM \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_cased_portuguese_lenerbr_pt.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_cased_portuguese_lenerbr_pt.md new file mode 100644 index 00000000000000..8335847887a8d6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_cased_portuguese_lenerbr_pt.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Portuguese bert_base_cased_portuguese_lenerbr BertEmbeddings from pierreguillou +author: John Snow Labs +name: bert_base_cased_portuguese_lenerbr +date: 2023-09-13 +tags: [bert, pt, open_source, fill_mask, onnx] +task: Embeddings +language: pt +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_cased_portuguese_lenerbr` is a Portuguese model originally trained by pierreguillou. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_cased_portuguese_lenerbr_pt_5.1.1_3.0_1694563345437.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_cased_portuguese_lenerbr_pt_5.1.1_3.0_1694563345437.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_cased_portuguese_lenerbr","pt") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_cased_portuguese_lenerbr", "pt") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_cased_portuguese_lenerbr| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|pt| +|Size:|405.9 MB| + +## References + +https://huggingface.co/pierreguillou/bert-base-cased-pt-lenerbr \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_dutch_cased_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_dutch_cased_en.md new file mode 100644 index 00000000000000..88af2281474189 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_dutch_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_dutch_cased BertEmbeddings from wietsedv +author: John Snow Labs +name: bert_base_dutch_cased +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_dutch_cased` is a English model originally trained by wietsedv. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_dutch_cased_en_5.1.1_3.0_1694583992022.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_dutch_cased_en_5.1.1_3.0_1694583992022.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_dutch_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_dutch_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_dutch_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|406.8 MB| + +## References + +https://huggingface.co/wietsedv/bert-base-dutch-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_dutch_cased_finetuned_manx_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_dutch_cased_finetuned_manx_en.md new file mode 100644 index 00000000000000..c1fd54a709286a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_dutch_cased_finetuned_manx_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_dutch_cased_finetuned_manx BertEmbeddings from Pyjay +author: John Snow Labs +name: bert_base_dutch_cased_finetuned_manx +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_dutch_cased_finetuned_manx` is a English model originally trained by Pyjay. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_dutch_cased_finetuned_manx_en_5.1.1_3.0_1694569643622.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_dutch_cased_finetuned_manx_en_5.1.1_3.0_1694569643622.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_dutch_cased_finetuned_manx","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_dutch_cased_finetuned_manx", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_dutch_cased_finetuned_manx| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|406.8 MB| + +## References + +https://huggingface.co/Pyjay/bert-base-dutch-cased-finetuned-gv \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_finetuned_wellness_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_finetuned_wellness_en.md new file mode 100644 index 00000000000000..c57a02843b5909 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_finetuned_wellness_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_finetuned_wellness BertEmbeddings from gaeunseo +author: John Snow Labs +name: bert_base_finetuned_wellness +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_finetuned_wellness` is a English model originally trained by gaeunseo. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_finetuned_wellness_en_5.1.1_3.0_1694583681541.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_finetuned_wellness_en_5.1.1_3.0_1694583681541.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_finetuned_wellness","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_finetuned_wellness", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_finetuned_wellness| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|412.4 MB| + +## References + +https://huggingface.co/gaeunseo/bert-base-finetuned-wellness \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_finnish_cased_v1_fi.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_finnish_cased_v1_fi.md new file mode 100644 index 00000000000000..c3de96cbd33ceb --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_finnish_cased_v1_fi.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Finnish bert_base_finnish_cased_v1 BertEmbeddings from TurkuNLP +author: John Snow Labs +name: bert_base_finnish_cased_v1 +date: 2023-09-13 +tags: [bert, fi, open_source, fill_mask, onnx] +task: Embeddings +language: fi +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_finnish_cased_v1` is a Finnish model originally trained by TurkuNLP. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_finnish_cased_v1_fi_5.1.1_3.0_1694573404619.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_finnish_cased_v1_fi_5.1.1_3.0_1694573404619.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_finnish_cased_v1","fi") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_finnish_cased_v1", "fi") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_finnish_cased_v1| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|fi| +|Size:|464.7 MB| + +## References + +https://huggingface.co/TurkuNLP/bert-base-finnish-cased-v1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_finnish_uncased_v1_fi.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_finnish_uncased_v1_fi.md new file mode 100644 index 00000000000000..902c15dd577caa --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_finnish_uncased_v1_fi.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Finnish bert_base_finnish_uncased_v1 BertEmbeddings from TurkuNLP +author: John Snow Labs +name: bert_base_finnish_uncased_v1 +date: 2023-09-13 +tags: [bert, fi, open_source, fill_mask, onnx] +task: Embeddings +language: fi +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_finnish_uncased_v1` is a Finnish model originally trained by TurkuNLP. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_finnish_uncased_v1_fi_5.1.1_3.0_1694573578470.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_finnish_uncased_v1_fi_5.1.1_3.0_1694573578470.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_finnish_uncased_v1","fi") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_finnish_uncased_v1", "fi") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_finnish_uncased_v1| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|fi| +|Size:|464.7 MB| + +## References + +https://huggingface.co/TurkuNLP/bert-base-finnish-uncased-v1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_frozen_generics_mlm_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_frozen_generics_mlm_en.md new file mode 100644 index 00000000000000..a619f0ecb1195a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_frozen_generics_mlm_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_frozen_generics_mlm BertEmbeddings from sello-ralethe +author: John Snow Labs +name: bert_base_frozen_generics_mlm +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_frozen_generics_mlm` is a English model originally trained by sello-ralethe. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_frozen_generics_mlm_en_5.1.1_3.0_1694572672553.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_frozen_generics_mlm_en_5.1.1_3.0_1694572672553.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_frozen_generics_mlm","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_frozen_generics_mlm", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_frozen_generics_mlm| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/sello-ralethe/bert-base-frozen-generics-mlm \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_historical_german_kinyarwanda_cased_de.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_historical_german_kinyarwanda_cased_de.md new file mode 100644 index 00000000000000..54c81fefde4399 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_historical_german_kinyarwanda_cased_de.md @@ -0,0 +1,93 @@ +--- +layout: model +title: German bert_base_historical_german_kinyarwanda_cased BertEmbeddings from redewiedergabe +author: John Snow Labs +name: bert_base_historical_german_kinyarwanda_cased +date: 2023-09-13 +tags: [bert, de, open_source, fill_mask, onnx] +task: Embeddings +language: de +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_historical_german_kinyarwanda_cased` is a German model originally trained by redewiedergabe. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_historical_german_kinyarwanda_cased_de_5.1.1_3.0_1694567053989.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_historical_german_kinyarwanda_cased_de_5.1.1_3.0_1694567053989.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_historical_german_kinyarwanda_cased","de") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_historical_german_kinyarwanda_cased", "de") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_historical_german_kinyarwanda_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|de| +|Size:|406.9 MB| + +## References + +https://huggingface.co/redewiedergabe/bert-base-historical-german-rw-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_italian_cased_osiria_it.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_italian_cased_osiria_it.md new file mode 100644 index 00000000000000..d08ae6ea1fc640 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_italian_cased_osiria_it.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Italian bert_base_italian_cased_osiria BertEmbeddings from osiria +author: John Snow Labs +name: bert_base_italian_cased_osiria +date: 2023-09-13 +tags: [bert, it, open_source, fill_mask, onnx] +task: Embeddings +language: it +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_italian_cased_osiria` is a Italian model originally trained by osiria. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_italian_cased_osiria_it_5.1.1_3.0_1694568086773.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_italian_cased_osiria_it_5.1.1_3.0_1694568086773.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_italian_cased_osiria","it") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_italian_cased_osiria", "it") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_italian_cased_osiria| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|it| +|Size:|409.0 MB| + +## References + +https://huggingface.co/osiria/bert-base-italian-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_macedonian_bulgarian_cased_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_macedonian_bulgarian_cased_en.md new file mode 100644 index 00000000000000..87202748969a34 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_macedonian_bulgarian_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_macedonian_bulgarian_cased BertEmbeddings from anon-submission-mk +author: John Snow Labs +name: bert_base_macedonian_bulgarian_cased +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_macedonian_bulgarian_cased` is a English model originally trained by anon-submission-mk. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_macedonian_bulgarian_cased_en_5.1.1_3.0_1694580092524.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_macedonian_bulgarian_cased_en_5.1.1_3.0_1694580092524.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_macedonian_bulgarian_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_macedonian_bulgarian_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_macedonian_bulgarian_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|412.1 MB| + +## References + +https://huggingface.co/anon-submission-mk/bert-base-macedonian-bulgarian-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_macedonian_cased_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_macedonian_cased_en.md new file mode 100644 index 00000000000000..915665f614518f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_macedonian_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_macedonian_cased BertEmbeddings from anon-submission-mk +author: John Snow Labs +name: bert_base_macedonian_cased +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_macedonian_cased` is a English model originally trained by anon-submission-mk. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_macedonian_cased_en_5.1.1_3.0_1694580230869.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_macedonian_cased_en_5.1.1_3.0_1694580230869.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_macedonian_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_macedonian_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_macedonian_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|412.2 MB| + +## References + +https://huggingface.co/anon-submission-mk/bert-base-macedonian-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_nli_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_nli_en.md new file mode 100644 index 00000000000000..ee2fc4b398a3b1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_nli_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_nli BertEmbeddings from binwang +author: John Snow Labs +name: bert_base_nli +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_nli` is a English model originally trained by binwang. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_nli_en_5.1.1_3.0_1694587924757.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_nli_en_5.1.1_3.0_1694587924757.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_nli","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_nli", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_nli| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/binwang/bert-base-nli \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_nli_stsb_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_nli_stsb_en.md new file mode 100644 index 00000000000000..10976f99b7af41 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_nli_stsb_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_nli_stsb BertEmbeddings from binwang +author: John Snow Labs +name: bert_base_nli_stsb +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_nli_stsb` is a English model originally trained by binwang. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_nli_stsb_en_5.1.1_3.0_1694587761118.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_nli_stsb_en_5.1.1_3.0_1694587761118.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_nli_stsb","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_nli_stsb", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_nli_stsb| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/binwang/bert-base-nli-stsb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_persian_sport_bert_uncased_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_persian_sport_bert_uncased_en.md new file mode 100644 index 00000000000000..25f4faa459ab63 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_persian_sport_bert_uncased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_persian_sport_bert_uncased BertEmbeddings from montazeri +author: John Snow Labs +name: bert_base_persian_sport_bert_uncased +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_persian_sport_bert_uncased` is a English model originally trained by montazeri. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_persian_sport_bert_uncased_en_5.1.1_3.0_1694564033173.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_persian_sport_bert_uncased_en_5.1.1_3.0_1694564033173.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_persian_sport_bert_uncased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_persian_sport_bert_uncased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_persian_sport_bert_uncased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|606.0 MB| + +## References + +https://huggingface.co/montazeri/bert-base-persian-sport-bert-uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_portuguese_cased_finetuned_peticoes_pt.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_portuguese_cased_finetuned_peticoes_pt.md new file mode 100644 index 00000000000000..d641307dc0233e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_portuguese_cased_finetuned_peticoes_pt.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Portuguese bert_base_portuguese_cased_finetuned_peticoes BertEmbeddings from Luciano +author: John Snow Labs +name: bert_base_portuguese_cased_finetuned_peticoes +date: 2023-09-13 +tags: [bert, pt, open_source, fill_mask, onnx] +task: Embeddings +language: pt +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_portuguese_cased_finetuned_peticoes` is a Portuguese model originally trained by Luciano. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_portuguese_cased_finetuned_peticoes_pt_5.1.1_3.0_1694566319648.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_portuguese_cased_finetuned_peticoes_pt_5.1.1_3.0_1694566319648.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_portuguese_cased_finetuned_peticoes","pt") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_portuguese_cased_finetuned_peticoes", "pt") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_portuguese_cased_finetuned_peticoes| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|pt| +|Size:|405.9 MB| + +## References + +https://huggingface.co/Luciano/bert-base-portuguese-cased-finetuned-peticoes \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_qarib60_1790k_ar.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_qarib60_1790k_ar.md new file mode 100644 index 00000000000000..12aa435d15dd47 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_qarib60_1790k_ar.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Arabic bert_base_qarib60_1790k BertEmbeddings from qarib +author: John Snow Labs +name: bert_base_qarib60_1790k +date: 2023-09-13 +tags: [bert, ar, open_source, fill_mask, onnx] +task: Embeddings +language: ar +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_qarib60_1790k` is a Arabic model originally trained by qarib. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_qarib60_1790k_ar_5.1.1_3.0_1694565184223.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_qarib60_1790k_ar_5.1.1_3.0_1694565184223.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_qarib60_1790k","ar") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_qarib60_1790k", "ar") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_qarib60_1790k| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|ar| +|Size:|504.9 MB| + +## References + +https://huggingface.co/qarib/bert-base-qarib60_1790k \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_qarib60_1970k_ar.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_qarib60_1970k_ar.md new file mode 100644 index 00000000000000..c10604c566e8e1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_qarib60_1970k_ar.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Arabic bert_base_qarib60_1970k BertEmbeddings from qarib +author: John Snow Labs +name: bert_base_qarib60_1970k +date: 2023-09-13 +tags: [bert, ar, open_source, fill_mask, onnx] +task: Embeddings +language: ar +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_qarib60_1970k` is a Arabic model originally trained by qarib. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_qarib60_1970k_ar_5.1.1_3.0_1694565365666.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_qarib60_1970k_ar_5.1.1_3.0_1694565365666.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_qarib60_1970k","ar") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_qarib60_1970k", "ar") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_qarib60_1970k| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|ar| +|Size:|504.9 MB| + +## References + +https://huggingface.co/qarib/bert-base-qarib60_1970k \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_qarib60_860k_ar.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_qarib60_860k_ar.md new file mode 100644 index 00000000000000..d2ee1a6aae964d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_qarib60_860k_ar.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Arabic bert_base_qarib60_860k BertEmbeddings from qarib +author: John Snow Labs +name: bert_base_qarib60_860k +date: 2023-09-13 +tags: [bert, ar, open_source, fill_mask, onnx] +task: Embeddings +language: ar +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_qarib60_860k` is a Arabic model originally trained by qarib. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_qarib60_860k_ar_5.1.1_3.0_1694565528171.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_qarib60_860k_ar_5.1.1_3.0_1694565528171.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_qarib60_860k","ar") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_qarib60_860k", "ar") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_qarib60_860k| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|ar| +|Size:|504.9 MB| + +## References + +https://huggingface.co/qarib/bert-base-qarib60_860k \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_qarib_ar.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_qarib_ar.md new file mode 100644 index 00000000000000..0692ef43688be0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_qarib_ar.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Arabic bert_base_qarib BertEmbeddings from qarib +author: John Snow Labs +name: bert_base_qarib +date: 2023-09-13 +tags: [bert, ar, open_source, fill_mask, onnx] +task: Embeddings +language: ar +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_qarib` is a Arabic model originally trained by qarib. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_qarib_ar_5.1.1_3.0_1694565010110.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_qarib_ar_5.1.1_3.0_1694565010110.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_qarib","ar") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_qarib", "ar") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_qarib| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|ar| +|Size:|504.0 MB| + +## References + +https://huggingface.co/qarib/bert-base-qarib \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_spanish_wwm_cased_finetuned_literature_pro_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_spanish_wwm_cased_finetuned_literature_pro_en.md new file mode 100644 index 00000000000000..0cc9b509d799fc --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_spanish_wwm_cased_finetuned_literature_pro_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_spanish_wwm_cased_finetuned_literature_pro BertEmbeddings from a-v-bely +author: John Snow Labs +name: bert_base_spanish_wwm_cased_finetuned_literature_pro +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_spanish_wwm_cased_finetuned_literature_pro` is a English model originally trained by a-v-bely. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_spanish_wwm_cased_finetuned_literature_pro_en_5.1.1_3.0_1694586382623.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_spanish_wwm_cased_finetuned_literature_pro_en_5.1.1_3.0_1694586382623.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_spanish_wwm_cased_finetuned_literature_pro","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_spanish_wwm_cased_finetuned_literature_pro", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_spanish_wwm_cased_finetuned_literature_pro| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|409.5 MB| + +## References + +https://huggingface.co/a-v-bely/bert-base-spanish-wwm-cased-finetuned-literature-pro \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_standard_bahasa_cased_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_standard_bahasa_cased_en.md new file mode 100644 index 00000000000000..9429486248ee7d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_standard_bahasa_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_standard_bahasa_cased BertEmbeddings from mesolitica +author: John Snow Labs +name: bert_base_standard_bahasa_cased +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_standard_bahasa_cased` is a English model originally trained by mesolitica. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_standard_bahasa_cased_en_5.1.1_3.0_1694586245028.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_standard_bahasa_cased_en_5.1.1_3.0_1694586245028.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_standard_bahasa_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_standard_bahasa_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_standard_bahasa_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|412.6 MB| + +## References + +https://huggingface.co/mesolitica/bert-base-standard-bahasa-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_swedish_cased_kblab_sv.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_swedish_cased_kblab_sv.md new file mode 100644 index 00000000000000..b4368b621b30b8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_swedish_cased_kblab_sv.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Swedish bert_base_swedish_cased_kblab BertEmbeddings from KBLab +author: John Snow Labs +name: bert_base_swedish_cased_kblab +date: 2023-09-13 +tags: [bert, sv, open_source, fill_mask, onnx] +task: Embeddings +language: sv +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_swedish_cased_kblab` is a Swedish model originally trained by KBLab. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_swedish_cased_kblab_sv_5.1.1_3.0_1694563899305.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_swedish_cased_kblab_sv_5.1.1_3.0_1694563899305.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_swedish_cased_kblab","sv") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_swedish_cased_kblab", "sv") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_swedish_cased_kblab| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|sv| +|Size:|465.2 MB| + +## References + +https://huggingface.co/KBLab/bert-base-swedish-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_2022_habana_test_1_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_2022_habana_test_1_en.md new file mode 100644 index 00000000000000..5c6c6bdf288220 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_2022_habana_test_1_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_2022_habana_test_1 BertEmbeddings from philschmid +author: John Snow Labs +name: bert_base_uncased_2022_habana_test_1 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_2022_habana_test_1` is a English model originally trained by philschmid. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_2022_habana_test_1_en_5.1.1_3.0_1694566037618.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_2022_habana_test_1_en_5.1.1_3.0_1694566037618.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_2022_habana_test_1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_2022_habana_test_1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_2022_habana_test_1| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.7 MB| + +## References + +https://huggingface.co/philschmid/bert-base-uncased-2022-habana-test-1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_2022_habana_test_2_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_2022_habana_test_2_en.md new file mode 100644 index 00000000000000..3cd591e38fdee4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_2022_habana_test_2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_2022_habana_test_2 BertEmbeddings from philschmid +author: John Snow Labs +name: bert_base_uncased_2022_habana_test_2 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_2022_habana_test_2` is a English model originally trained by philschmid. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_2022_habana_test_2_en_5.1.1_3.0_1694569462383.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_2022_habana_test_2_en_5.1.1_3.0_1694569462383.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_2022_habana_test_2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_2022_habana_test_2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_2022_habana_test_2| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.7 MB| + +## References + +https://huggingface.co/philschmid/bert-base-uncased-2022-habana-test-2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_2022_habana_test_3_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_2022_habana_test_3_en.md new file mode 100644 index 00000000000000..218d2f78c5da77 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_2022_habana_test_3_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_2022_habana_test_3 BertEmbeddings from philschmid +author: John Snow Labs +name: bert_base_uncased_2022_habana_test_3 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_2022_habana_test_3` is a English model originally trained by philschmid. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_2022_habana_test_3_en_5.1.1_3.0_1694569751143.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_2022_habana_test_3_en_5.1.1_3.0_1694569751143.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_2022_habana_test_3","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_2022_habana_test_3", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_2022_habana_test_3| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|412.0 MB| + +## References + +https://huggingface.co/philschmid/bert-base-uncased-2022-habana-test-3 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_2022_habana_test_4_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_2022_habana_test_4_en.md new file mode 100644 index 00000000000000..e5f8c8ef0bcff7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_2022_habana_test_4_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_2022_habana_test_4 BertEmbeddings from philschmid +author: John Snow Labs +name: bert_base_uncased_2022_habana_test_4 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_2022_habana_test_4` is a English model originally trained by philschmid. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_2022_habana_test_4_en_5.1.1_3.0_1694572974212.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_2022_habana_test_4_en_5.1.1_3.0_1694572974212.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_2022_habana_test_4","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_2022_habana_test_4", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_2022_habana_test_4| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|412.0 MB| + +## References + +https://huggingface.co/philschmid/bert-base-uncased-2022-habana-test-4 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_2022_habana_test_5_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_2022_habana_test_5_en.md new file mode 100644 index 00000000000000..4a619767e6925b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_2022_habana_test_5_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_2022_habana_test_5 BertEmbeddings from philschmid +author: John Snow Labs +name: bert_base_uncased_2022_habana_test_5 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_2022_habana_test_5` is a English model originally trained by philschmid. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_2022_habana_test_5_en_5.1.1_3.0_1694574203077.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_2022_habana_test_5_en_5.1.1_3.0_1694574203077.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_2022_habana_test_5","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_2022_habana_test_5", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_2022_habana_test_5| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|412.0 MB| + +## References + +https://huggingface.co/philschmid/bert-base-uncased-2022-habana-test-5 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_2022_habana_test_6_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_2022_habana_test_6_en.md new file mode 100644 index 00000000000000..1e515cfb8302f3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_2022_habana_test_6_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_2022_habana_test_6 BertEmbeddings from philschmid +author: John Snow Labs +name: bert_base_uncased_2022_habana_test_6 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_2022_habana_test_6` is a English model originally trained by philschmid. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_2022_habana_test_6_en_5.1.1_3.0_1694574566491.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_2022_habana_test_6_en_5.1.1_3.0_1694574566491.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_2022_habana_test_6","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_2022_habana_test_6", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_2022_habana_test_6| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|412.2 MB| + +## References + +https://huggingface.co/philschmid/bert-base-uncased-2022-habana-test-6 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_bert_mask_complete_word_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_bert_mask_complete_word_en.md new file mode 100644 index 00000000000000..02279fa9c971ce --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_bert_mask_complete_word_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_bert_mask_complete_word BertEmbeddings from keshavG +author: John Snow Labs +name: bert_base_uncased_bert_mask_complete_word +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_bert_mask_complete_word` is a English model originally trained by keshavG. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_bert_mask_complete_word_en_5.1.1_3.0_1694578511284.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_bert_mask_complete_word_en_5.1.1_3.0_1694578511284.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_bert_mask_complete_word","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_bert_mask_complete_word", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_bert_mask_complete_word| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.0 MB| + +## References + +https://huggingface.co/keshavG/bert-base-uncased-bert_mask_complete_word \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_bert_mask_complete_word_updated_vocab_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_bert_mask_complete_word_updated_vocab_en.md new file mode 100644 index 00000000000000..c5125a4ea23f0c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_bert_mask_complete_word_updated_vocab_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_bert_mask_complete_word_updated_vocab BertEmbeddings from keshavG +author: John Snow Labs +name: bert_base_uncased_bert_mask_complete_word_updated_vocab +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_bert_mask_complete_word_updated_vocab` is a English model originally trained by keshavG. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_bert_mask_complete_word_updated_vocab_en_5.1.1_3.0_1694585644846.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_bert_mask_complete_word_updated_vocab_en_5.1.1_3.0_1694585644846.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_bert_mask_complete_word_updated_vocab","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_bert_mask_complete_word_updated_vocab", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_bert_mask_complete_word_updated_vocab| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|413.5 MB| + +## References + +https://huggingface.co/keshavG/bert-base-uncased-bert_mask_complete_word_updated_vocab \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_binwang_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_binwang_en.md new file mode 100644 index 00000000000000..4a2802f534a14f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_binwang_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_binwang BertEmbeddings from binwang +author: John Snow Labs +name: bert_base_uncased_binwang +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_binwang` is a English model originally trained by binwang. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_binwang_en_5.1.1_3.0_1694588138775.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_binwang_en_5.1.1_3.0_1694588138775.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_binwang","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_binwang", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_binwang| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/binwang/bert-base-uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_contents_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_contents_en.md new file mode 100644 index 00000000000000..134c377b26fe41 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_contents_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_contents BertEmbeddings from Contents +author: John Snow Labs +name: bert_base_uncased_contents +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_contents` is a English model originally trained by Contents. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_contents_en_5.1.1_3.0_1694579787819.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_contents_en_5.1.1_3.0_1694579787819.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_contents","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_contents", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_contents| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/Contents/bert-base-uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_dstc9_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_dstc9_en.md new file mode 100644 index 00000000000000..5aa5e23b4e62f1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_dstc9_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_dstc9 BertEmbeddings from wilsontam +author: John Snow Labs +name: bert_base_uncased_dstc9 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_dstc9` is a English model originally trained by wilsontam. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_dstc9_en_5.1.1_3.0_1694585476644.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_dstc9_en_5.1.1_3.0_1694585476644.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_dstc9","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_dstc9", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_dstc9| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/wilsontam/bert-base-uncased-dstc9 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_finetuned_bertbero_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_finetuned_bertbero_en.md new file mode 100644 index 00000000000000..a51fd851d73000 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_finetuned_bertbero_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_finetuned_bertbero BertEmbeddings from Transabrar +author: John Snow Labs +name: bert_base_uncased_finetuned_bertbero +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_finetuned_bertbero` is a English model originally trained by Transabrar. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_finetuned_bertbero_en_5.1.1_3.0_1694572476667.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_finetuned_bertbero_en_5.1.1_3.0_1694572476667.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_finetuned_bertbero","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_finetuned_bertbero", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_finetuned_bertbero| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/Transabrar/bert-base-uncased-finetuned-bertbero \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_finetuned_gap_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_finetuned_gap_en.md new file mode 100644 index 00000000000000..ba346fe26b01d8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_finetuned_gap_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_finetuned_gap BertEmbeddings from AriyanH +author: John Snow Labs +name: bert_base_uncased_finetuned_gap +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_finetuned_gap` is a English model originally trained by AriyanH. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_finetuned_gap_en_5.1.1_3.0_1694571134435.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_finetuned_gap_en_5.1.1_3.0_1694571134435.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_finetuned_gap","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_finetuned_gap", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_finetuned_gap| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/AriyanH/bert-base-uncased-finetuned-gap \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_finetuned_rramicus_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_finetuned_rramicus_en.md new file mode 100644 index 00000000000000..55876135fdce1b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_finetuned_rramicus_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_finetuned_rramicus BertEmbeddings from repro-rights-amicus-briefs +author: John Snow Labs +name: bert_base_uncased_finetuned_rramicus +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_finetuned_rramicus` is a English model originally trained by repro-rights-amicus-briefs. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_finetuned_rramicus_en_5.1.1_3.0_1694567579152.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_finetuned_rramicus_en_5.1.1_3.0_1694567579152.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_finetuned_rramicus","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_finetuned_rramicus", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_finetuned_rramicus| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/repro-rights-amicus-briefs/bert-base-uncased-finetuned-RRamicus \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_finetuning_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_finetuning_en.md new file mode 100644 index 00000000000000..cedd2b66153091 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_finetuning_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_finetuning BertEmbeddings from shimu +author: John Snow Labs +name: bert_base_uncased_finetuning +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_finetuning` is a English model originally trained by shimu. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_finetuning_en_5.1.1_3.0_1694573972903.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_finetuning_en_5.1.1_3.0_1694573972903.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_finetuning","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_finetuning", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_finetuning| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/shimu/bert_base_uncased_finetuning \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_issues_128_chrispfield_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_issues_128_chrispfield_en.md new file mode 100644 index 00000000000000..31f2b3f3ae9c6e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_issues_128_chrispfield_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_issues_128_chrispfield BertEmbeddings from Chrispfield +author: John Snow Labs +name: bert_base_uncased_issues_128_chrispfield +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_issues_128_chrispfield` is a English model originally trained by Chrispfield. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_issues_128_chrispfield_en_5.1.1_3.0_1694567399706.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_issues_128_chrispfield_en_5.1.1_3.0_1694567399706.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_issues_128_chrispfield","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_issues_128_chrispfield", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_issues_128_chrispfield| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/Chrispfield/bert-base-uncased-issues-128 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_issues_128_issues_128_kiri1701_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_issues_128_issues_128_kiri1701_en.md new file mode 100644 index 00000000000000..07ee9e81042a40 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_issues_128_issues_128_kiri1701_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_issues_128_issues_128_kiri1701 BertEmbeddings from kiri1701 +author: John Snow Labs +name: bert_base_uncased_issues_128_issues_128_kiri1701 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_issues_128_issues_128_kiri1701` is a English model originally trained by kiri1701. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_issues_128_issues_128_kiri1701_en_5.1.1_3.0_1694564477310.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_issues_128_issues_128_kiri1701_en_5.1.1_3.0_1694564477310.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_issues_128_issues_128_kiri1701","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_issues_128_issues_128_kiri1701", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_issues_128_issues_128_kiri1701| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/kiri1701/bert-base-uncased-issues-128-issues-128 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_issues_128_munsu_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_issues_128_munsu_en.md new file mode 100644 index 00000000000000..e1ad51b8371472 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_issues_128_munsu_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_issues_128_munsu BertEmbeddings from MunSu +author: John Snow Labs +name: bert_base_uncased_issues_128_munsu +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_issues_128_munsu` is a English model originally trained by MunSu. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_issues_128_munsu_en_5.1.1_3.0_1694586892919.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_issues_128_munsu_en_5.1.1_3.0_1694586892919.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_issues_128_munsu","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_issues_128_munsu", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_issues_128_munsu| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/MunSu/bert-base-uncased-issues-128 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_issues_128_twidfeel_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_issues_128_twidfeel_en.md new file mode 100644 index 00000000000000..82bd79398682ee --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_issues_128_twidfeel_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_issues_128_twidfeel BertEmbeddings from twidfeel +author: John Snow Labs +name: bert_base_uncased_issues_128_twidfeel +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_issues_128_twidfeel` is a English model originally trained by twidfeel. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_issues_128_twidfeel_en_5.1.1_3.0_1694586346305.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_issues_128_twidfeel_en_5.1.1_3.0_1694586346305.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_issues_128_twidfeel","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_issues_128_twidfeel", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_issues_128_twidfeel| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/twidfeel/bert-base-uncased-issues-128 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_lm_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_lm_en.md new file mode 100644 index 00000000000000..5a7ec6224cf0d2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_lm_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_lm BertEmbeddings from iewaij +author: John Snow Labs +name: bert_base_uncased_lm +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_lm` is a English model originally trained by iewaij. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_lm_en_5.1.1_3.0_1694578212936.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_lm_en_5.1.1_3.0_1694578212936.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_lm","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_lm", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_lm| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/iewaij/bert-base-uncased-lm \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_noisy_orcas_1.0positive_0.5_negative_margin1.0_cosine_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_noisy_orcas_1.0positive_0.5_negative_margin1.0_cosine_en.md new file mode 100644 index 00000000000000..ab77562c3516e5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_noisy_orcas_1.0positive_0.5_negative_margin1.0_cosine_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_noisy_orcas_1.0positive_0.5_negative_margin1.0_cosine BertEmbeddings from spacemanidol +author: John Snow Labs +name: bert_base_uncased_noisy_orcas_1.0positive_0.5_negative_margin1.0_cosine +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_noisy_orcas_1.0positive_0.5_negative_margin1.0_cosine` is a English model originally trained by spacemanidol. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_noisy_orcas_1.0positive_0.5_negative_margin1.0_cosine_en_5.1.1_3.0_1694578896728.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_noisy_orcas_1.0positive_0.5_negative_margin1.0_cosine_en_5.1.1_3.0_1694578896728.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_noisy_orcas_1.0positive_0.5_negative_margin1.0_cosine","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_noisy_orcas_1.0positive_0.5_negative_margin1.0_cosine", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_noisy_orcas_1.0positive_0.5_negative_margin1.0_cosine| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/spacemanidol/bert-base-uncased-noisy-orcas-1.0positive-0.5-negative-margin1.0-cosine \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_reviews_2_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_reviews_2_en.md new file mode 100644 index 00000000000000..080d928ea56603 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_reviews_2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_reviews_2 BertEmbeddings from insaf +author: John Snow Labs +name: bert_base_uncased_reviews_2 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_reviews_2` is a English model originally trained by insaf. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_reviews_2_en_5.1.1_3.0_1694580821822.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_reviews_2_en_5.1.1_3.0_1694580821822.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_reviews_2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_reviews_2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_reviews_2| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/insaf/bert-base-uncased-reviews-2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_reviews_3_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_reviews_3_en.md new file mode 100644 index 00000000000000..bdd2578e56df05 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_reviews_3_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_reviews_3 BertEmbeddings from insaf +author: John Snow Labs +name: bert_base_uncased_reviews_3 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_reviews_3` is a English model originally trained by insaf. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_reviews_3_en_5.1.1_3.0_1694581488679.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_reviews_3_en_5.1.1_3.0_1694581488679.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_reviews_3","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_reviews_3", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_reviews_3| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/insaf/bert-base-uncased-reviews-3 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_rotten_tomatoes_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_rotten_tomatoes_en.md new file mode 100644 index 00000000000000..388ba75ecb9555 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_rotten_tomatoes_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_rotten_tomatoes BertEmbeddings from textattack +author: John Snow Labs +name: bert_base_uncased_rotten_tomatoes +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_rotten_tomatoes` is a English model originally trained by textattack. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_rotten_tomatoes_en_5.1.1_3.0_1694577597481.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_rotten_tomatoes_en_5.1.1_3.0_1694577597481.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_rotten_tomatoes","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_rotten_tomatoes", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_rotten_tomatoes| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/textattack/bert-base-uncased-rotten_tomatoes \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_test_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_test_en.md new file mode 100644 index 00000000000000..a8382838dcce78 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_test_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_test BertEmbeddings from Contents +author: John Snow Labs +name: bert_base_uncased_test +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_test` is a English model originally trained by Contents. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_test_en_5.1.1_3.0_1694584225340.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_test_en_5.1.1_3.0_1694584225340.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_test","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_test", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_test| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/Contents/bert-base-uncased-test \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_vn_finetuned_portuguese_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_vn_finetuned_portuguese_en.md new file mode 100644 index 00000000000000..f672f2eb5369a0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_vn_finetuned_portuguese_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_vn_finetuned_portuguese BertEmbeddings from dotansang +author: John Snow Labs +name: bert_base_vn_finetuned_portuguese +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_vn_finetuned_portuguese` is a English model originally trained by dotansang. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_vn_finetuned_portuguese_en_5.1.1_3.0_1694582275769.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_vn_finetuned_portuguese_en_5.1.1_3.0_1694582275769.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_vn_finetuned_portuguese","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_vn_finetuned_portuguese", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_vn_finetuned_portuguese| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|498.8 MB| + +## References + +https://huggingface.co/dotansang/bert-base-vn-finetuned-pt \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_yc_recipe_30_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_yc_recipe_30_en.md new file mode 100644 index 00000000000000..214621443f1dfb --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_yc_recipe_30_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_yc_recipe_30 BertEmbeddings from CennetOguz +author: John Snow Labs +name: bert_base_yc_recipe_30 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_yc_recipe_30` is a English model originally trained by CennetOguz. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_yc_recipe_30_en_5.1.1_3.0_1694568535972.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_yc_recipe_30_en_5.1.1_3.0_1694568535972.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_yc_recipe_30","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_yc_recipe_30", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_yc_recipe_30| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/CennetOguz/bert_base_yc_recipe_30 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_cn_wudi7758521521_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_cn_wudi7758521521_en.md new file mode 100644 index 00000000000000..5761861a3274d7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_cn_wudi7758521521_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_cn_wudi7758521521 BertEmbeddings from wudi7758521521 +author: John Snow Labs +name: bert_cn_wudi7758521521 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_cn_wudi7758521521` is a English model originally trained by wudi7758521521. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_cn_wudi7758521521_en_5.1.1_3.0_1694585808653.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_cn_wudi7758521521_en_5.1.1_3.0_1694585808653.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_cn_wudi7758521521","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_cn_wudi7758521521", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_cn_wudi7758521521| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/wudi7758521521/bert_cn \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_concat_2_finetune_simcse_truncate_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_concat_2_finetune_simcse_truncate_en.md new file mode 100644 index 00000000000000..7dfa950dce2240 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_concat_2_finetune_simcse_truncate_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_concat_2_finetune_simcse_truncate BertEmbeddings from NasimB +author: John Snow Labs +name: bert_concat_2_finetune_simcse_truncate +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_concat_2_finetune_simcse_truncate` is a English model originally trained by NasimB. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_concat_2_finetune_simcse_truncate_en_5.1.1_3.0_1694588304153.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_concat_2_finetune_simcse_truncate_en_5.1.1_3.0_1694588304153.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_concat_2_finetune_simcse_truncate","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_concat_2_finetune_simcse_truncate", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_concat_2_finetune_simcse_truncate| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|399.9 MB| + +## References + +https://huggingface.co/NasimB/bert-concat-2-finetune-simcse-truncate \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_concat_3_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_concat_3_en.md new file mode 100644 index 00000000000000..84ec7e94261b5b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_concat_3_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_concat_3 BertEmbeddings from NasimB +author: John Snow Labs +name: bert_concat_3 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_concat_3` is a English model originally trained by NasimB. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_concat_3_en_5.1.1_3.0_1694587599955.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_concat_3_en_5.1.1_3.0_1694587599955.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_concat_3","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_concat_3", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_concat_3| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|408.2 MB| + +## References + +https://huggingface.co/NasimB/bert-concat-3 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_concat_3_finetune_simcse_truncate_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_concat_3_finetune_simcse_truncate_en.md new file mode 100644 index 00000000000000..40ade906ae5fb9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_concat_3_finetune_simcse_truncate_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_concat_3_finetune_simcse_truncate BertEmbeddings from NasimB +author: John Snow Labs +name: bert_concat_3_finetune_simcse_truncate +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_concat_3_finetune_simcse_truncate` is a English model originally trained by NasimB. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_concat_3_finetune_simcse_truncate_en_5.1.1_3.0_1694588145224.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_concat_3_finetune_simcse_truncate_en_5.1.1_3.0_1694588145224.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_concat_3_finetune_simcse_truncate","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_concat_3_finetune_simcse_truncate", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_concat_3_finetune_simcse_truncate| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|401.0 MB| + +## References + +https://huggingface.co/NasimB/bert-concat-3-finetune-simcse-truncate \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_csl_gold8k_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_csl_gold8k_en.md new file mode 100644 index 00000000000000..453130fb2c2cb8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_csl_gold8k_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_csl_gold8k BertEmbeddings from subbareddyiiit +author: John Snow Labs +name: bert_csl_gold8k +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_csl_gold8k` is a English model originally trained by subbareddyiiit. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_csl_gold8k_en_5.1.1_3.0_1694575569274.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_csl_gold8k_en_5.1.1_3.0_1694575569274.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_csl_gold8k","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_csl_gold8k", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_csl_gold8k| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.6 MB| + +## References + +https://huggingface.co/subbareddyiiit/bert_csl_gold8k \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_dk_laptop_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_dk_laptop_en.md new file mode 100644 index 00000000000000..184b207184d8e4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_dk_laptop_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_dk_laptop BertEmbeddings from activebus +author: John Snow Labs +name: bert_dk_laptop +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_dk_laptop` is a English model originally trained by activebus. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_dk_laptop_en_5.1.1_3.0_1694575473487.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_dk_laptop_en_5.1.1_3.0_1694575473487.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_dk_laptop","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_dk_laptop", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_dk_laptop| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|258.4 MB| + +## References + +https://huggingface.co/activebus/BERT-DK_laptop \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_embding_finetuned_spmlm_02_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_embding_finetuned_spmlm_02_en.md new file mode 100644 index 00000000000000..6b2382812cd515 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_embding_finetuned_spmlm_02_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_embding_finetuned_spmlm_02 BertEmbeddings from ashwathjadhav23 +author: John Snow Labs +name: bert_embding_finetuned_spmlm_02 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_embding_finetuned_spmlm_02` is a English model originally trained by ashwathjadhav23. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_embding_finetuned_spmlm_02_en_5.1.1_3.0_1694585695765.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_embding_finetuned_spmlm_02_en_5.1.1_3.0_1694585695765.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_embding_finetuned_spmlm_02","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_embding_finetuned_spmlm_02", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_embding_finetuned_spmlm_02| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.6 MB| + +## References + +https://huggingface.co/ashwathjadhav23/Bert_Embding_Finetuned_SpMLM_02 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_finetune_simcse_truncate_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_finetune_simcse_truncate_en.md new file mode 100644 index 00000000000000..f911da42bcbdf5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_finetune_simcse_truncate_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_finetune_simcse_truncate BertEmbeddings from NasimB +author: John Snow Labs +name: bert_finetune_simcse_truncate +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_finetune_simcse_truncate` is a English model originally trained by NasimB. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_finetune_simcse_truncate_en_5.1.1_3.0_1694587925442.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_finetune_simcse_truncate_en_5.1.1_3.0_1694587925442.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_finetune_simcse_truncate","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_finetune_simcse_truncate", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_finetune_simcse_truncate| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.3 MB| + +## References + +https://huggingface.co/NasimB/bert-finetune-simcse-truncate \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_finetuning_test1_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_finetuning_test1_en.md new file mode 100644 index 00000000000000..22ee728365f6b5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_finetuning_test1_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_finetuning_test1 BertEmbeddings from bill +author: John Snow Labs +name: bert_finetuning_test1 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_finetuning_test1` is a English model originally trained by bill. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_finetuning_test1_en_5.1.1_3.0_1694587604269.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_finetuning_test1_en_5.1.1_3.0_1694587604269.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_finetuning_test1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_finetuning_test1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_finetuning_test1| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/bill/bert_finetuning_test1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_finetuning_test_mine_result_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_finetuning_test_mine_result_en.md new file mode 100644 index 00000000000000..5a4c08c126c555 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_finetuning_test_mine_result_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_finetuning_test_mine_result BertEmbeddings from Martinlabla +author: John Snow Labs +name: bert_finetuning_test_mine_result +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_finetuning_test_mine_result` is a English model originally trained by Martinlabla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_finetuning_test_mine_result_en_5.1.1_3.0_1694567497197.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_finetuning_test_mine_result_en_5.1.1_3.0_1694567497197.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_finetuning_test_mine_result","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_finetuning_test_mine_result", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_finetuning_test_mine_result| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/Martinlabla/bert_finetuning_test_mine_result \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_finetuning_test_xiejiafang_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_finetuning_test_xiejiafang_en.md new file mode 100644 index 00000000000000..1def6ee443a447 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_finetuning_test_xiejiafang_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_finetuning_test_xiejiafang BertEmbeddings from xiejiafang +author: John Snow Labs +name: bert_finetuning_test_xiejiafang +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_finetuning_test_xiejiafang` is a English model originally trained by xiejiafang. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_finetuning_test_xiejiafang_en_5.1.1_3.0_1694586563453.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_finetuning_test_xiejiafang_en_5.1.1_3.0_1694586563453.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_finetuning_test_xiejiafang","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_finetuning_test_xiejiafang", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_finetuning_test_xiejiafang| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/xiejiafang/bert_finetuning_test \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_hinglish_big_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_hinglish_big_en.md new file mode 100644 index 00000000000000..9076731484a34a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_hinglish_big_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_hinglish_big BertEmbeddings from aditeyabaral +author: John Snow Labs +name: bert_hinglish_big +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_hinglish_big` is a English model originally trained by aditeyabaral. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_hinglish_big_en_5.1.1_3.0_1694576522327.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_hinglish_big_en_5.1.1_3.0_1694576522327.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_hinglish_big","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_hinglish_big", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_hinglish_big| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|248.9 MB| + +## References + +https://huggingface.co/aditeyabaral/bert-hinglish-big \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_hinglish_small_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_hinglish_small_en.md new file mode 100644 index 00000000000000..b6ca09020bdff6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_hinglish_small_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_hinglish_small BertEmbeddings from aditeyabaral +author: John Snow Labs +name: bert_hinglish_small +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_hinglish_small` is a English model originally trained by aditeyabaral. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_hinglish_small_en_5.1.1_3.0_1694576634952.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_hinglish_small_en_5.1.1_3.0_1694576634952.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_hinglish_small","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_hinglish_small", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_hinglish_small| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|249.0 MB| + +## References + +https://huggingface.co/aditeyabaral/bert-hinglish-small \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_java_bfp_single_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_java_bfp_single_en.md new file mode 100644 index 00000000000000..b4bb0eeb54081b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_java_bfp_single_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_java_bfp_single BertEmbeddings from up201806461 +author: John Snow Labs +name: bert_java_bfp_single +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_java_bfp_single` is a English model originally trained by up201806461. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_java_bfp_single_en_5.1.1_3.0_1694578962374.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_java_bfp_single_en_5.1.1_3.0_1694578962374.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_java_bfp_single","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_java_bfp_single", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_java_bfp_single| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.6 MB| + +## References + +https://huggingface.co/up201806461/bert-java-bfp_single \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_large_arabertv2_ar.md b/docs/_posts/ahmedlone127/2023-09-13-bert_large_arabertv2_ar.md new file mode 100644 index 00000000000000..f11181a70680fd --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_large_arabertv2_ar.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Arabic bert_large_arabertv2 BertEmbeddings from aubmindlab +author: John Snow Labs +name: bert_large_arabertv2 +date: 2023-09-13 +tags: [bert, ar, open_source, fill_mask, onnx] +task: Embeddings +language: ar +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_arabertv2` is a Arabic model originally trained by aubmindlab. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_arabertv2_ar_5.1.1_3.0_1694584366192.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_arabertv2_ar_5.1.1_3.0_1694584366192.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_arabertv2","ar") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_arabertv2", "ar") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_arabertv2| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|ar| +|Size:|1.4 GB| + +## References + +https://huggingface.co/aubmindlab/bert-large-arabertv2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_finetuned_cased_da_20_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_finetuned_cased_da_20_en.md new file mode 100644 index 00000000000000..cf89ae83f44e9e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_finetuned_cased_da_20_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_finetuned_cased_da_20 BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_cased_finetuned_cased_da_20 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_finetuned_cased_da_20` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_finetuned_cased_da_20_en_5.1.1_3.0_1694564770508.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_finetuned_cased_da_20_en_5.1.1_3.0_1694564770508.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_finetuned_cased_da_20","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_finetuned_cased_da_20", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_finetuned_cased_da_20| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-cased-finetuned-cased-DA-20 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_finetuned_low20_1_cased_da_20_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_finetuned_low20_1_cased_da_20_en.md new file mode 100644 index 00000000000000..742afa69b26391 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_finetuned_low20_1_cased_da_20_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_finetuned_low20_1_cased_da_20 BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_cased_finetuned_low20_1_cased_da_20 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_finetuned_low20_1_cased_da_20` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_finetuned_low20_1_cased_da_20_en_5.1.1_3.0_1694565382077.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_finetuned_low20_1_cased_da_20_en_5.1.1_3.0_1694565382077.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_finetuned_low20_1_cased_da_20","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_finetuned_low20_1_cased_da_20", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_finetuned_low20_1_cased_da_20| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-cased-finetuned-low20-1-cased-DA-20 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_finetuned_low20_cased_da_20_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_finetuned_low20_cased_da_20_en.md new file mode 100644 index 00000000000000..5628db32798146 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_finetuned_low20_cased_da_20_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_finetuned_low20_cased_da_20 BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_cased_finetuned_low20_cased_da_20 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_finetuned_low20_cased_da_20` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_finetuned_low20_cased_da_20_en_5.1.1_3.0_1694565075875.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_finetuned_low20_cased_da_20_en_5.1.1_3.0_1694565075875.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_finetuned_low20_cased_da_20","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_finetuned_low20_cased_da_20", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_finetuned_low20_cased_da_20| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-cased-finetuned-low20-cased-DA-20 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_finetuned_lowr100_0_cased_da_20_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_finetuned_lowr100_0_cased_da_20_en.md new file mode 100644 index 00000000000000..bf2d0cdadfa73c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_finetuned_lowr100_0_cased_da_20_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_finetuned_lowr100_0_cased_da_20 BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_cased_finetuned_lowr100_0_cased_da_20 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_finetuned_lowr100_0_cased_da_20` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_finetuned_lowr100_0_cased_da_20_en_5.1.1_3.0_1694566867523.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_finetuned_lowr100_0_cased_da_20_en_5.1.1_3.0_1694566867523.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_finetuned_lowr100_0_cased_da_20","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_finetuned_lowr100_0_cased_da_20", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_finetuned_lowr100_0_cased_da_20| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-cased-finetuned-lowR100-0-cased-DA-20 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_finetuned_lowr100_2_cased_da_20_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_finetuned_lowr100_2_cased_da_20_en.md new file mode 100644 index 00000000000000..9dc2a676cb3740 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_finetuned_lowr100_2_cased_da_20_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_finetuned_lowr100_2_cased_da_20 BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_cased_finetuned_lowr100_2_cased_da_20 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_finetuned_lowr100_2_cased_da_20` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_finetuned_lowr100_2_cased_da_20_en_5.1.1_3.0_1694567487345.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_finetuned_lowr100_2_cased_da_20_en_5.1.1_3.0_1694567487345.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_finetuned_lowr100_2_cased_da_20","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_finetuned_lowr100_2_cased_da_20", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_finetuned_lowr100_2_cased_da_20| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-cased-finetuned-lowR100-2-cased-DA-20 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_finetuned_lowr100_3_cased_da_20_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_finetuned_lowr100_3_cased_da_20_en.md new file mode 100644 index 00000000000000..c5885ec396d33b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_finetuned_lowr100_3_cased_da_20_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_finetuned_lowr100_3_cased_da_20 BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_cased_finetuned_lowr100_3_cased_da_20 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_finetuned_lowr100_3_cased_da_20` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_finetuned_lowr100_3_cased_da_20_en_5.1.1_3.0_1694568354875.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_finetuned_lowr100_3_cased_da_20_en_5.1.1_3.0_1694568354875.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_finetuned_lowr100_3_cased_da_20","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_finetuned_lowr100_3_cased_da_20", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_finetuned_lowr100_3_cased_da_20| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-cased-finetuned-lowR100-3-cased-DA-20 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_finetuned_lowr10_0_cased_da_20_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_finetuned_lowr10_0_cased_da_20_en.md new file mode 100644 index 00000000000000..6b434478dd0b6c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_finetuned_lowr10_0_cased_da_20_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_finetuned_lowr10_0_cased_da_20 BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_cased_finetuned_lowr10_0_cased_da_20 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_finetuned_lowr10_0_cased_da_20` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_finetuned_lowr10_0_cased_da_20_en_5.1.1_3.0_1694566558054.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_finetuned_lowr10_0_cased_da_20_en_5.1.1_3.0_1694566558054.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_finetuned_lowr10_0_cased_da_20","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_finetuned_lowr10_0_cased_da_20", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_finetuned_lowr10_0_cased_da_20| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-cased-finetuned-lowR10-0-cased-DA-20 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_portuguese_lenerbr_pt.md b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_portuguese_lenerbr_pt.md new file mode 100644 index 00000000000000..c79af0651c7296 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_portuguese_lenerbr_pt.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Portuguese bert_large_cased_portuguese_lenerbr BertEmbeddings from pierreguillou +author: John Snow Labs +name: bert_large_cased_portuguese_lenerbr +date: 2023-09-13 +tags: [bert, pt, open_source, fill_mask, onnx] +task: Embeddings +language: pt +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_portuguese_lenerbr` is a Portuguese model originally trained by pierreguillou. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_portuguese_lenerbr_pt_5.1.1_3.0_1694563642184.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_portuguese_lenerbr_pt_5.1.1_3.0_1694563642184.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_portuguese_lenerbr","pt") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_portuguese_lenerbr", "pt") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_portuguese_lenerbr| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|pt| +|Size:|1.2 GB| + +## References + +https://huggingface.co/pierreguillou/bert-large-cased-pt-lenerbr \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_full_norwegian_label_20_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_full_norwegian_label_20_en.md new file mode 100644 index 00000000000000..fc968c1493760b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_full_norwegian_label_20_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_sigir_full_norwegian_label_20 BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_cased_sigir_full_norwegian_label_20 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_sigir_full_norwegian_label_20` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_full_norwegian_label_20_en_5.1.1_3.0_1694571475257.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_full_norwegian_label_20_en_5.1.1_3.0_1694571475257.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_sigir_full_norwegian_label_20","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_sigir_full_norwegian_label_20", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_sigir_full_norwegian_label_20| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-cased-sigir-full-no-label-20 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_lr100_0_cased_20_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_lr100_0_cased_20_en.md new file mode 100644 index 00000000000000..2a6aeb41314249 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_lr100_0_cased_20_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_sigir_lr100_0_cased_20 BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_cased_sigir_lr100_0_cased_20 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_sigir_lr100_0_cased_20` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_lr100_0_cased_20_en_5.1.1_3.0_1694569625563.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_lr100_0_cased_20_en_5.1.1_3.0_1694569625563.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_sigir_lr100_0_cased_20","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_sigir_lr100_0_cased_20", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_sigir_lr100_0_cased_20| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-cased-sigir-LR100-0-cased-20 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_lr100_1_cased_150_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_lr100_1_cased_150_en.md new file mode 100644 index 00000000000000..6d946f77f19d95 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_lr100_1_cased_150_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_sigir_lr100_1_cased_150 BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_cased_sigir_lr100_1_cased_150 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_sigir_lr100_1_cased_150` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_lr100_1_cased_150_en_5.1.1_3.0_1694569943900.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_lr100_1_cased_150_en_5.1.1_3.0_1694569943900.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_sigir_lr100_1_cased_150","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_sigir_lr100_1_cased_150", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_sigir_lr100_1_cased_150| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-cased-sigir-LR100-1-cased-150 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_norwegian_label_20_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_norwegian_label_20_en.md new file mode 100644 index 00000000000000..32011553115ddb --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_norwegian_label_20_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_sigir_support_norwegian_label_20 BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_cased_sigir_support_norwegian_label_20 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_sigir_support_norwegian_label_20` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_norwegian_label_20_en_5.1.1_3.0_1694571918630.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_norwegian_label_20_en_5.1.1_3.0_1694571918630.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_sigir_support_norwegian_label_20","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_sigir_support_norwegian_label_20", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_sigir_support_norwegian_label_20| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-cased-sigir-support-no-label-20 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_norwegian_label_20_sigir_tune2nd_lr10_labelled_30_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_norwegian_label_20_sigir_tune2nd_lr10_labelled_30_en.md new file mode 100644 index 00000000000000..ff2b3585327705 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_norwegian_label_20_sigir_tune2nd_lr10_labelled_30_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_sigir_support_norwegian_label_20_sigir_tune2nd_lr10_labelled_30 BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_cased_sigir_support_norwegian_label_20_sigir_tune2nd_lr10_labelled_30 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_sigir_support_norwegian_label_20_sigir_tune2nd_lr10_labelled_30` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_norwegian_label_20_sigir_tune2nd_lr10_labelled_30_en_5.1.1_3.0_1694572892853.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_norwegian_label_20_sigir_tune2nd_lr10_labelled_30_en_5.1.1_3.0_1694572892853.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_sigir_support_norwegian_label_20_sigir_tune2nd_lr10_labelled_30","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_sigir_support_norwegian_label_20_sigir_tune2nd_lr10_labelled_30", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_sigir_support_norwegian_label_20_sigir_tune2nd_lr10_labelled_30| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-cased-sigir-support-no-label-20-sigir-tune2nd-LR10-labelled-30 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_norwegian_label_40_sigir_tune2nd_lr100_labelled_30_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_norwegian_label_40_sigir_tune2nd_lr100_labelled_30_en.md new file mode 100644 index 00000000000000..c8a51f528afb3e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_norwegian_label_40_sigir_tune2nd_lr100_labelled_30_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_sigir_support_norwegian_label_40_sigir_tune2nd_lr100_labelled_30 BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_cased_sigir_support_norwegian_label_40_sigir_tune2nd_lr100_labelled_30 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_sigir_support_norwegian_label_40_sigir_tune2nd_lr100_labelled_30` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_norwegian_label_40_sigir_tune2nd_lr100_labelled_30_en_5.1.1_3.0_1694573387926.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_norwegian_label_40_sigir_tune2nd_lr100_labelled_30_en_5.1.1_3.0_1694573387926.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_sigir_support_norwegian_label_40_sigir_tune2nd_lr100_labelled_30","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_sigir_support_norwegian_label_40_sigir_tune2nd_lr100_labelled_30", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_sigir_support_norwegian_label_40_sigir_tune2nd_lr100_labelled_30| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-cased-sigir-support-no-label-40-sigir-tune2nd-LR100-labelled-30 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr100_40_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr100_40_en.md new file mode 100644 index 00000000000000..42baa44b0f5b1a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr100_40_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr100_40 BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr100_40 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr100_40` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr100_40_en_5.1.1_3.0_1694575697199.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr100_40_en_5.1.1_3.0_1694575697199.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr100_40","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr100_40", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr100_40| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-cased-sigir-support-refute-no-label-40-2nd-test-LR100-40 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_20_0_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_20_0_en.md new file mode 100644 index 00000000000000..8d51641eafa082 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_20_0_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_20_0 BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_20_0 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_20_0` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_20_0_en_5.1.1_3.0_1694587179778.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_20_0_en_5.1.1_3.0_1694587179778.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_20_0","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_20_0", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_20_0| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-cased-sigir-support-refute-no-label-40-2nd-test-LR10-20-0 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_20_1_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_20_1_en.md new file mode 100644 index 00000000000000..63c70aa648c253 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_20_1_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_20_1 BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_20_1 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_20_1` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_20_1_en_5.1.1_3.0_1694587498641.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_20_1_en_5.1.1_3.0_1694587498641.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_20_1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_20_1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_20_1| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-cased-sigir-support-refute-no-label-40-2nd-test-LR10-20-1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_40_2_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_40_2_en.md new file mode 100644 index 00000000000000..aa4440ab6b4e39 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_40_2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_40_2 BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_40_2 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_40_2` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_40_2_en_5.1.1_3.0_1694583363515.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_40_2_en_5.1.1_3.0_1694583363515.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_40_2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_40_2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_40_2| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-cased-sigir-support-refute-no-label-40-2nd-test-LR10-40-2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_40_3_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_40_3_en.md new file mode 100644 index 00000000000000..1f3705914e7257 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_40_3_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_40_3 BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_40_3 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_40_3` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_40_3_en_5.1.1_3.0_1694584252445.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_40_3_en_5.1.1_3.0_1694584252445.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_40_3","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_40_3", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_40_3| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-cased-sigir-support-refute-no-label-40-2nd-test-LR10-40-3 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_20_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_20_en.md new file mode 100644 index 00000000000000..10d26077bb674b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_20_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_20 BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_20 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_20` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_20_en_5.1.1_3.0_1694569334144.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_20_en_5.1.1_3.0_1694569334144.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_20","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_20", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_20| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-cased-sigir-support-refute-no-label-40-2nd-test-LR10-8-fast-20 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_23_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_23_en.md new file mode 100644 index 00000000000000..d6f9c2bf77517f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_23_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_23 BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_23 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_23` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_23_en_5.1.1_3.0_1694570314564.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_23_en_5.1.1_3.0_1694570314564.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_23","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_23", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_23| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-cased-sigir-support-refute-no-label-40-2nd-test-LR10-8-fast-23 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_25_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_25_en.md new file mode 100644 index 00000000000000..aaedbbe999655e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_25_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_25 BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_25 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_25` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_25_en_5.1.1_3.0_1694570981130.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_25_en_5.1.1_3.0_1694570981130.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_25","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_25", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_25| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-cased-sigir-support-refute-no-label-40-2nd-test-LR10-8-fast-25 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_12_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_12_en.md new file mode 100644 index 00000000000000..49f1ac33dd7388 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_12_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_12 BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_12 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_12` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_12_en_5.1.1_3.0_1694574046482.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_12_en_5.1.1_3.0_1694574046482.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_12","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_12", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_12| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-cased-sigir-support-refute-no-label-40-2nd-test-LR50-8-fast-12 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_13_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_13_en.md new file mode 100644 index 00000000000000..56968d95fd37a9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_13_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_13 BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_13 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_13` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_13_en_5.1.1_3.0_1694574358005.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_13_en_5.1.1_3.0_1694574358005.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_13","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_13", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_13| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-cased-sigir-support-refute-no-label-40-2nd-test-LR50-8-fast-13 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_14_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_14_en.md new file mode 100644 index 00000000000000..85029202afe2be --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_14_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_14 BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_14 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_14` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_14_en_5.1.1_3.0_1694574703123.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_14_en_5.1.1_3.0_1694574703123.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_14","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_14", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_14| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-cased-sigir-support-refute-no-label-40-2nd-test-LR50-8-fast-14 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_16_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_16_en.md new file mode 100644 index 00000000000000..3c74ff9fe82398 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_16_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_16 BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_16 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_16` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_16_en_5.1.1_3.0_1694575328626.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_16_en_5.1.1_3.0_1694575328626.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_16","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_16", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_16| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-cased-sigir-support-refute-no-label-40-2nd-test-LR50-8-fast-16 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_17_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_17_en.md new file mode 100644 index 00000000000000..fbfc1cf4982f05 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_17_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_17 BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_17 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_17` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_17_en_5.1.1_3.0_1694575626182.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_17_en_5.1.1_3.0_1694575626182.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_17","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_17", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_17| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-cased-sigir-support-refute-no-label-40-2nd-test-LR50-8-fast-17 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_6_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_6_en.md new file mode 100644 index 00000000000000..78a13e0d225837 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_6_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_6 BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_6 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_6` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_6_en_5.1.1_3.0_1694572150639.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_6_en_5.1.1_3.0_1694572150639.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_6","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_6", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_6| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-cased-sigir-support-refute-no-label-40-2nd-test-LR50-8-fast-6 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_9_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_9_en.md new file mode 100644 index 00000000000000..86b598b7a2c880 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_9_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_9 BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_9 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_9` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_9_en_5.1.1_3.0_1694573104931.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_9_en_5.1.1_3.0_1694573104931.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_9","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_9", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_9| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-cased-sigir-support-refute-no-label-40-2nd-test-LR50-8-fast-9 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_en.md new file mode 100644 index 00000000000000..11a6c25a169b43 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_sigir_support_refute_norwegian_label_40 BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_cased_sigir_support_refute_norwegian_label_40 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_sigir_support_refute_norwegian_label_40` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_en_5.1.1_3.0_1694575070771.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_en_5.1.1_3.0_1694575070771.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_sigir_support_refute_norwegian_label_40| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-cased-sigir-support-refute-no-label-40 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_large_contrastive_self_supervised_acl2020_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_large_contrastive_self_supervised_acl2020_en.md new file mode 100644 index 00000000000000..09d9d0127b41ee --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_large_contrastive_self_supervised_acl2020_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_contrastive_self_supervised_acl2020 BertEmbeddings from sap-ai-research +author: John Snow Labs +name: bert_large_contrastive_self_supervised_acl2020 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_contrastive_self_supervised_acl2020` is a English model originally trained by sap-ai-research. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_contrastive_self_supervised_acl2020_en_5.1.1_3.0_1694571008191.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_contrastive_self_supervised_acl2020_en_5.1.1_3.0_1694571008191.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_contrastive_self_supervised_acl2020","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_contrastive_self_supervised_acl2020", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_contrastive_self_supervised_acl2020| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/sap-ai-research/BERT-Large-Contrastive-Self-Supervised-ACL2020 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_large_swedish_nordic_pile_150_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_large_swedish_nordic_pile_150_en.md new file mode 100644 index 00000000000000..ea5af26fb79536 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_large_swedish_nordic_pile_150_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_swedish_nordic_pile_150 BertEmbeddings from timpal0l +author: John Snow Labs +name: bert_large_swedish_nordic_pile_150 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_swedish_nordic_pile_150` is a English model originally trained by timpal0l. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_swedish_nordic_pile_150_en_5.1.1_3.0_1694579612852.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_swedish_nordic_pile_150_en_5.1.1_3.0_1694579612852.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_swedish_nordic_pile_150","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_swedish_nordic_pile_150", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_swedish_nordic_pile_150| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.4 GB| + +## References + +https://huggingface.co/timpal0l/bert_large_sv_nordic_pile_150 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_large_swedish_uncased_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_large_swedish_uncased_en.md new file mode 100644 index 00000000000000..234d4adf88b1ab --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_large_swedish_uncased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_swedish_uncased BertEmbeddings from af-ai-center +author: John Snow Labs +name: bert_large_swedish_uncased +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_swedish_uncased` is a English model originally trained by af-ai-center. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_swedish_uncased_en_5.1.1_3.0_1694577225450.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_swedish_uncased_en_5.1.1_3.0_1694577225450.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_swedish_uncased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_swedish_uncased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_swedish_uncased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/af-ai-center/bert-large-swedish-uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_large_uncased_finetuned_lowr100_4_uncased_da_20_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_large_uncased_finetuned_lowr100_4_uncased_da_20_en.md new file mode 100644 index 00000000000000..94d90204495b8d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_large_uncased_finetuned_lowr100_4_uncased_da_20_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_uncased_finetuned_lowr100_4_uncased_da_20 BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_uncased_finetuned_lowr100_4_uncased_da_20 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_uncased_finetuned_lowr100_4_uncased_da_20` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_uncased_finetuned_lowr100_4_uncased_da_20_en_5.1.1_3.0_1694568669304.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_uncased_finetuned_lowr100_4_uncased_da_20_en_5.1.1_3.0_1694568669304.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_uncased_finetuned_lowr100_4_uncased_da_20","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_uncased_finetuned_lowr100_4_uncased_da_20", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_uncased_finetuned_lowr100_4_uncased_da_20| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-uncased-finetuned-lowR100-4-uncased-DA-20 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_large_uncased_finetuned_lowr100_5_uncased_da_20_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_large_uncased_finetuned_lowr100_5_uncased_da_20_en.md new file mode 100644 index 00000000000000..290ff49ccce50e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_large_uncased_finetuned_lowr100_5_uncased_da_20_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_uncased_finetuned_lowr100_5_uncased_da_20 BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_uncased_finetuned_lowr100_5_uncased_da_20 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_uncased_finetuned_lowr100_5_uncased_da_20` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_uncased_finetuned_lowr100_5_uncased_da_20_en_5.1.1_3.0_1694568976431.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_uncased_finetuned_lowr100_5_uncased_da_20_en_5.1.1_3.0_1694568976431.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_uncased_finetuned_lowr100_5_uncased_da_20","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_uncased_finetuned_lowr100_5_uncased_da_20", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_uncased_finetuned_lowr100_5_uncased_da_20| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-uncased-finetuned-lowR100-5-uncased-DA-20 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_large_yc_recipe_30_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_large_yc_recipe_30_en.md new file mode 100644 index 00000000000000..4a7e7a1eef328a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_large_yc_recipe_30_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_yc_recipe_30 BertEmbeddings from CennetOguz +author: John Snow Labs +name: bert_large_yc_recipe_30 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_yc_recipe_30` is a English model originally trained by CennetOguz. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_yc_recipe_30_en_5.1.1_3.0_1694568821432.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_yc_recipe_30_en_5.1.1_3.0_1694568821432.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_yc_recipe_30","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_yc_recipe_30", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_yc_recipe_30| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/CennetOguz/bert_large_yc_recipe_30 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_mini_arabic_ar.md b/docs/_posts/ahmedlone127/2023-09-13-bert_mini_arabic_ar.md new file mode 100644 index 00000000000000..0754572efb6391 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_mini_arabic_ar.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Arabic bert_mini_arabic BertEmbeddings from asafaya +author: John Snow Labs +name: bert_mini_arabic +date: 2023-09-13 +tags: [bert, ar, open_source, fill_mask, onnx] +task: Embeddings +language: ar +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_mini_arabic` is a Arabic model originally trained by asafaya. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_mini_arabic_ar_5.1.1_3.0_1694582500015.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_mini_arabic_ar_5.1.1_3.0_1694582500015.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_mini_arabic","ar") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_mini_arabic", "ar") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_mini_arabic| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|ar| +|Size:|43.3 MB| + +## References + +https://huggingface.co/asafaya/bert-mini-arabic \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_model_nyashavision22_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_model_nyashavision22_en.md new file mode 100644 index 00000000000000..3c902faad2fe87 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_model_nyashavision22_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_model_nyashavision22 BertEmbeddings from NyashaVision22 +author: John Snow Labs +name: bert_model_nyashavision22 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_model_nyashavision22` is a English model originally trained by NyashaVision22. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_model_nyashavision22_en_5.1.1_3.0_1694576589229.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_model_nyashavision22_en_5.1.1_3.0_1694576589229.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_model_nyashavision22","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_model_nyashavision22", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_model_nyashavision22| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.6 MB| + +## References + +https://huggingface.co/NyashaVision22/bert_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_nlp_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_nlp_en.md new file mode 100644 index 00000000000000..39e3cba70f042e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_nlp_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_nlp BertEmbeddings from subbareddyiiit +author: John Snow Labs +name: bert_nlp +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_nlp` is a English model originally trained by subbareddyiiit. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_nlp_en_5.1.1_3.0_1694575401616.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_nlp_en_5.1.1_3.0_1694575401616.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_nlp","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_nlp", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_nlp| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.6 MB| + +## References + +https://huggingface.co/subbareddyiiit/BERT-NLP \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_portuguese_inf_corpus_v.1_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_portuguese_inf_corpus_v.1_en.md new file mode 100644 index 00000000000000..2ece59f5ffa5d1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_portuguese_inf_corpus_v.1_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_portuguese_inf_corpus_v.1 BertEmbeddings from ricardo-filho +author: John Snow Labs +name: bert_portuguese_inf_corpus_v.1 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_portuguese_inf_corpus_v.1` is a English model originally trained by ricardo-filho. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_portuguese_inf_corpus_v.1_en_5.1.1_3.0_1694568024545.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_portuguese_inf_corpus_v.1_en_5.1.1_3.0_1694568024545.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_portuguese_inf_corpus_v.1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_portuguese_inf_corpus_v.1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_portuguese_inf_corpus_v.1| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|405.9 MB| + +## References + +https://huggingface.co/ricardo-filho/BERT-pt-inf-corpus-v.1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_portuguese_institutional_corpus_v.1_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_portuguese_institutional_corpus_v.1_en.md new file mode 100644 index 00000000000000..85eb5332731eeb --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_portuguese_institutional_corpus_v.1_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_portuguese_institutional_corpus_v.1 BertEmbeddings from ricardo-filho +author: John Snow Labs +name: bert_portuguese_institutional_corpus_v.1 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_portuguese_institutional_corpus_v.1` is a English model originally trained by ricardo-filho. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_portuguese_institutional_corpus_v.1_en_5.1.1_3.0_1694568183725.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_portuguese_institutional_corpus_v.1_en_5.1.1_3.0_1694568183725.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_portuguese_institutional_corpus_v.1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_portuguese_institutional_corpus_v.1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_portuguese_institutional_corpus_v.1| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|405.9 MB| + +## References + +https://huggingface.co/ricardo-filho/BERT-pt-institutional-corpus-v.1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_portuguese_institutional_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_portuguese_institutional_en.md new file mode 100644 index 00000000000000..7b1160dc5c975a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_portuguese_institutional_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_portuguese_institutional BertEmbeddings from ricardo-filho +author: John Snow Labs +name: bert_portuguese_institutional +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_portuguese_institutional` is a English model originally trained by ricardo-filho. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_portuguese_institutional_en_5.1.1_3.0_1694568312532.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_portuguese_institutional_en_5.1.1_3.0_1694568312532.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_portuguese_institutional","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_portuguese_institutional", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_portuguese_institutional| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|405.9 MB| + +## References + +https://huggingface.co/ricardo-filho/BERT-pt-institutional \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_pt_laptop_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_pt_laptop_en.md new file mode 100644 index 00000000000000..0d8598c932a835 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_pt_laptop_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_pt_laptop BertEmbeddings from activebus +author: John Snow Labs +name: bert_pt_laptop +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_pt_laptop` is a English model originally trained by activebus. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_pt_laptop_en_5.1.1_3.0_1694575945954.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_pt_laptop_en_5.1.1_3.0_1694575945954.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_pt_laptop","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_pt_laptop", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_pt_laptop| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|258.4 MB| + +## References + +https://huggingface.co/activebus/BERT-PT_laptop \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_pt_rest_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_pt_rest_en.md new file mode 100644 index 00000000000000..e5a75c271c4afc --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_pt_rest_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_pt_rest BertEmbeddings from activebus +author: John Snow Labs +name: bert_pt_rest +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_pt_rest` is a English model originally trained by activebus. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_pt_rest_en_5.1.1_3.0_1694576170608.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_pt_rest_en_5.1.1_3.0_1694576170608.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_pt_rest","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_pt_rest", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_pt_rest| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|258.4 MB| + +## References + +https://huggingface.co/activebus/BERT-PT_rest \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_small_cord19_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_small_cord19_en.md new file mode 100644 index 00000000000000..0daf7b1a83b9d1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_small_cord19_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_small_cord19 BertEmbeddings from NeuML +author: John Snow Labs +name: bert_small_cord19 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_small_cord19` is a English model originally trained by NeuML. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_small_cord19_en_5.1.1_3.0_1694569266803.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_small_cord19_en_5.1.1_3.0_1694569266803.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_small_cord19","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_small_cord19", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_small_cord19| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|130.5 MB| + +## References + +https://huggingface.co/NeuML/bert-small-cord19 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_small_finer_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_small_finer_en.md new file mode 100644 index 00000000000000..d0fdcade4480da --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_small_finer_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_small_finer BertEmbeddings from muhtasham +author: John Snow Labs +name: bert_small_finer +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_small_finer` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_small_finer_en_5.1.1_3.0_1694578634265.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_small_finer_en_5.1.1_3.0_1694578634265.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_small_finer","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_small_finer", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_small_finer| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|106.9 MB| + +## References + +https://huggingface.co/muhtasham/bert-small-finer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_small_finer_longer_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_small_finer_longer_en.md new file mode 100644 index 00000000000000..597ad6f196b6b8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_small_finer_longer_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_small_finer_longer BertEmbeddings from muhtasham +author: John Snow Labs +name: bert_small_finer_longer +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_small_finer_longer` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_small_finer_longer_en_5.1.1_3.0_1694578725327.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_small_finer_longer_en_5.1.1_3.0_1694578725327.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_small_finer_longer","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_small_finer_longer", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_small_finer_longer| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|106.9 MB| + +## References + +https://huggingface.co/muhtasham/bert-small-finer-longer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_small_finetuned_eurlex_longer_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_small_finetuned_eurlex_longer_en.md new file mode 100644 index 00000000000000..f93de11cb3d516 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_small_finetuned_eurlex_longer_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_small_finetuned_eurlex_longer BertEmbeddings from muhtasham +author: John Snow Labs +name: bert_small_finetuned_eurlex_longer +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_small_finetuned_eurlex_longer` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_small_finetuned_eurlex_longer_en_5.1.1_3.0_1694569982036.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_small_finetuned_eurlex_longer_en_5.1.1_3.0_1694569982036.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_small_finetuned_eurlex_longer","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_small_finetuned_eurlex_longer", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_small_finetuned_eurlex_longer| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|107.0 MB| + +## References + +https://huggingface.co/muhtasham/bert-small-finetuned-eurlex-longer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_small_finetuned_finer_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_small_finetuned_finer_en.md new file mode 100644 index 00000000000000..0370444f921f6d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_small_finetuned_finer_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_small_finetuned_finer BertEmbeddings from muhtasham +author: John Snow Labs +name: bert_small_finetuned_finer +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_small_finetuned_finer` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_small_finetuned_finer_en_5.1.1_3.0_1694577758798.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_small_finetuned_finer_en_5.1.1_3.0_1694577758798.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_small_finetuned_finer","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_small_finetuned_finer", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_small_finetuned_finer| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|106.9 MB| + +## References + +https://huggingface.co/muhtasham/bert-small-finetuned-finer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_small_finetuned_finer_longer10_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_small_finetuned_finer_longer10_en.md new file mode 100644 index 00000000000000..5bc0cf5e011fac --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_small_finetuned_finer_longer10_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_small_finetuned_finer_longer10 BertEmbeddings from muhtasham +author: John Snow Labs +name: bert_small_finetuned_finer_longer10 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_small_finetuned_finer_longer10` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_small_finetuned_finer_longer10_en_5.1.1_3.0_1694577859721.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_small_finetuned_finer_longer10_en_5.1.1_3.0_1694577859721.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_small_finetuned_finer_longer10","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_small_finetuned_finer_longer10", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_small_finetuned_finer_longer10| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|106.9 MB| + +## References + +https://huggingface.co/muhtasham/bert-small-finetuned-finer-longer10 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_small_nan_labels_500_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_small_nan_labels_500_en.md new file mode 100644 index 00000000000000..b6ca774d2c0695 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_small_nan_labels_500_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_small_nan_labels_500 BertEmbeddings from muhtasham +author: John Snow Labs +name: bert_small_nan_labels_500 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_small_nan_labels_500` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_small_nan_labels_500_en_5.1.1_3.0_1694574048493.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_small_nan_labels_500_en_5.1.1_3.0_1694574048493.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_small_nan_labels_500","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_small_nan_labels_500", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_small_nan_labels_500| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|106.9 MB| + +## References + +https://huggingface.co/muhtasham/bert-small-nan-labels-500 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_small_pretrained_on_squad_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_small_pretrained_on_squad_en.md new file mode 100644 index 00000000000000..9c78abf0796c12 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_small_pretrained_on_squad_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_small_pretrained_on_squad BertEmbeddings from anas-awadalla +author: John Snow Labs +name: bert_small_pretrained_on_squad +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_small_pretrained_on_squad` is a English model originally trained by anas-awadalla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_small_pretrained_on_squad_en_5.1.1_3.0_1694579945463.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_small_pretrained_on_squad_en_5.1.1_3.0_1694579945463.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_small_pretrained_on_squad","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_small_pretrained_on_squad", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_small_pretrained_on_squad| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|106.9 MB| + +## References + +https://huggingface.co/anas-awadalla/bert-small-pretrained-on-squad \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_tiny_finetuned_legal_definitions_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_tiny_finetuned_legal_definitions_en.md new file mode 100644 index 00000000000000..e30e903e9dfbb6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_tiny_finetuned_legal_definitions_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_tiny_finetuned_legal_definitions BertEmbeddings from muhtasham +author: John Snow Labs +name: bert_tiny_finetuned_legal_definitions +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_tiny_finetuned_legal_definitions` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_tiny_finetuned_legal_definitions_en_5.1.1_3.0_1694564433910.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_tiny_finetuned_legal_definitions_en_5.1.1_3.0_1694564433910.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_tiny_finetuned_legal_definitions","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_tiny_finetuned_legal_definitions", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_tiny_finetuned_legal_definitions| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|16.7 MB| + +## References + +https://huggingface.co/muhtasham/bert-tiny-finetuned-legal-definitions \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_tiny_finetuned_nan_labels_nepal_bhasa_longer_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_tiny_finetuned_nan_labels_nepal_bhasa_longer_en.md new file mode 100644 index 00000000000000..f0ff6a6c7c3782 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_tiny_finetuned_nan_labels_nepal_bhasa_longer_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_tiny_finetuned_nan_labels_nepal_bhasa_longer BertEmbeddings from muhtasham +author: John Snow Labs +name: bert_tiny_finetuned_nan_labels_nepal_bhasa_longer +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_tiny_finetuned_nan_labels_nepal_bhasa_longer` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_tiny_finetuned_nan_labels_nepal_bhasa_longer_en_5.1.1_3.0_1694565410527.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_tiny_finetuned_nan_labels_nepal_bhasa_longer_en_5.1.1_3.0_1694565410527.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_tiny_finetuned_nan_labels_nepal_bhasa_longer","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_tiny_finetuned_nan_labels_nepal_bhasa_longer", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_tiny_finetuned_nan_labels_nepal_bhasa_longer| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|16.7 MB| + +## References + +https://huggingface.co/muhtasham/bert-tiny-finetuned-nan-labels-new-longer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_truncate_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_truncate_en.md new file mode 100644 index 00000000000000..ab00b088985163 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_truncate_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_truncate BertEmbeddings from NasimB +author: John Snow Labs +name: bert_truncate +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_truncate` is a English model originally trained by NasimB. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_truncate_en_5.1.1_3.0_1694587157781.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_truncate_en_5.1.1_3.0_1694587157781.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_truncate","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_truncate", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_truncate| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|400.3 MB| + +## References + +https://huggingface.co/NasimB/bert-truncate \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_ucb_5_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_ucb_5_en.md new file mode 100644 index 00000000000000..0f4f477fd701a5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_ucb_5_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_ucb_5 BertEmbeddings from Diegomejia +author: John Snow Labs +name: bert_ucb_5 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_ucb_5` is a English model originally trained by Diegomejia. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_ucb_5_en_5.1.1_3.0_1694570353048.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_ucb_5_en_5.1.1_3.0_1694570353048.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_ucb_5","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_ucb_5", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_ucb_5| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|409.5 MB| + +## References + +https://huggingface.co/Diegomejia/bert-ucb-5 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_uncased_l_10_h_512_a_8_cord19_200616_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_uncased_l_10_h_512_a_8_cord19_200616_en.md new file mode 100644 index 00000000000000..5fac0d66ef244d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_uncased_l_10_h_512_a_8_cord19_200616_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_uncased_l_10_h_512_a_8_cord19_200616 BertEmbeddings from aodiniz +author: John Snow Labs +name: bert_uncased_l_10_h_512_a_8_cord19_200616 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_uncased_l_10_h_512_a_8_cord19_200616` is a English model originally trained by aodiniz. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_uncased_l_10_h_512_a_8_cord19_200616_en_5.1.1_3.0_1694580662292.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_uncased_l_10_h_512_a_8_cord19_200616_en_5.1.1_3.0_1694580662292.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_uncased_l_10_h_512_a_8_cord19_200616","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_uncased_l_10_h_512_a_8_cord19_200616", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_uncased_l_10_h_512_a_8_cord19_200616| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|177.4 MB| + +## References + +https://huggingface.co/aodiniz/bert_uncased_L-10_H-512_A-8_cord19-200616 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_uncased_l_2_h_256_a_4_mlm_multi_emails_hq_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_uncased_l_2_h_256_a_4_mlm_multi_emails_hq_en.md new file mode 100644 index 00000000000000..5faa852e2c8546 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_uncased_l_2_h_256_a_4_mlm_multi_emails_hq_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_uncased_l_2_h_256_a_4_mlm_multi_emails_hq BertEmbeddings from postbot +author: John Snow Labs +name: bert_uncased_l_2_h_256_a_4_mlm_multi_emails_hq +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_uncased_l_2_h_256_a_4_mlm_multi_emails_hq` is a English model originally trained by postbot. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_uncased_l_2_h_256_a_4_mlm_multi_emails_hq_en_5.1.1_3.0_1694575813152.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_uncased_l_2_h_256_a_4_mlm_multi_emails_hq_en_5.1.1_3.0_1694575813152.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_uncased_l_2_h_256_a_4_mlm_multi_emails_hq","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_uncased_l_2_h_256_a_4_mlm_multi_emails_hq", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_uncased_l_2_h_256_a_4_mlm_multi_emails_hq| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|35.9 MB| + +## References + +https://huggingface.co/postbot/bert_uncased_L-2_H-256_A-4-mlm-multi-emails-hq \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_uncased_l_2_h_512_a_8_cord19_200616_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_uncased_l_2_h_512_a_8_cord19_200616_en.md new file mode 100644 index 00000000000000..2ab2f5f66731e5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_uncased_l_2_h_512_a_8_cord19_200616_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_uncased_l_2_h_512_a_8_cord19_200616 BertEmbeddings from aodiniz +author: John Snow Labs +name: bert_uncased_l_2_h_512_a_8_cord19_200616 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_uncased_l_2_h_512_a_8_cord19_200616` is a English model originally trained by aodiniz. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_uncased_l_2_h_512_a_8_cord19_200616_en_5.1.1_3.0_1694580817170.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_uncased_l_2_h_512_a_8_cord19_200616_en_5.1.1_3.0_1694580817170.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_uncased_l_2_h_512_a_8_cord19_200616","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_uncased_l_2_h_512_a_8_cord19_200616", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_uncased_l_2_h_512_a_8_cord19_200616| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|83.3 MB| + +## References + +https://huggingface.co/aodiniz/bert_uncased_L-2_H-512_A-8_cord19-200616 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_uncased_l_4_h_512_a_8_cord19_200616_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_uncased_l_4_h_512_a_8_cord19_200616_en.md new file mode 100644 index 00000000000000..0efa00cf262be3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_uncased_l_4_h_512_a_8_cord19_200616_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_uncased_l_4_h_512_a_8_cord19_200616 BertEmbeddings from aodiniz +author: John Snow Labs +name: bert_uncased_l_4_h_512_a_8_cord19_200616 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_uncased_l_4_h_512_a_8_cord19_200616` is a English model originally trained by aodiniz. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_uncased_l_4_h_512_a_8_cord19_200616_en_5.1.1_3.0_1694580980693.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_uncased_l_4_h_512_a_8_cord19_200616_en_5.1.1_3.0_1694580980693.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_uncased_l_4_h_512_a_8_cord19_200616","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_uncased_l_4_h_512_a_8_cord19_200616", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_uncased_l_4_h_512_a_8_cord19_200616| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|106.9 MB| + +## References + +https://huggingface.co/aodiniz/bert_uncased_L-4_H-512_A-8_cord19-200616 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_uncased_l_6_h_128_a_2_cord19_200616_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_uncased_l_6_h_128_a_2_cord19_200616_en.md new file mode 100644 index 00000000000000..6045aee085b47f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_uncased_l_6_h_128_a_2_cord19_200616_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_uncased_l_6_h_128_a_2_cord19_200616 BertEmbeddings from aodiniz +author: John Snow Labs +name: bert_uncased_l_6_h_128_a_2_cord19_200616 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_uncased_l_6_h_128_a_2_cord19_200616` is a English model originally trained by aodiniz. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_uncased_l_6_h_128_a_2_cord19_200616_en_5.1.1_3.0_1694581174056.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_uncased_l_6_h_128_a_2_cord19_200616_en_5.1.1_3.0_1694581174056.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_uncased_l_6_h_128_a_2_cord19_200616","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_uncased_l_6_h_128_a_2_cord19_200616", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_uncased_l_6_h_128_a_2_cord19_200616| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|19.6 MB| + +## References + +https://huggingface.co/aodiniz/bert_uncased_L-6_H-128_A-2_cord19-200616 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_uncased_tiny_2xthicc_multi_emails_hq_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_uncased_tiny_2xthicc_multi_emails_hq_en.md new file mode 100644 index 00000000000000..01094daa40f30b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_uncased_tiny_2xthicc_multi_emails_hq_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_uncased_tiny_2xthicc_multi_emails_hq BertEmbeddings from postbot +author: John Snow Labs +name: bert_uncased_tiny_2xthicc_multi_emails_hq +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_uncased_tiny_2xthicc_multi_emails_hq` is a English model originally trained by postbot. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_uncased_tiny_2xthicc_multi_emails_hq_en_5.1.1_3.0_1694575965085.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_uncased_tiny_2xthicc_multi_emails_hq_en_5.1.1_3.0_1694575965085.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_uncased_tiny_2xthicc_multi_emails_hq","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_uncased_tiny_2xthicc_multi_emails_hq", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_uncased_tiny_2xthicc_multi_emails_hq| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|18.1 MB| + +## References + +https://huggingface.co/postbot/bert_uncased_tiny_2xthicc-multi-emails-hq \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_uncased_tiny_multi_emails_hq_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_uncased_tiny_multi_emails_hq_en.md new file mode 100644 index 00000000000000..1bc9cd4fed9901 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_uncased_tiny_multi_emails_hq_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_uncased_tiny_multi_emails_hq BertEmbeddings from postbot +author: John Snow Labs +name: bert_uncased_tiny_multi_emails_hq +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_uncased_tiny_multi_emails_hq` is a English model originally trained by postbot. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_uncased_tiny_multi_emails_hq_en_5.1.1_3.0_1694575894615.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_uncased_tiny_multi_emails_hq_en_5.1.1_3.0_1694575894615.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_uncased_tiny_multi_emails_hq","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_uncased_tiny_multi_emails_hq", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_uncased_tiny_multi_emails_hq| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|16.7 MB| + +## References + +https://huggingface.co/postbot/bert_uncased_tiny-multi-emails-hq \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_wwm_words_law_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_wwm_words_law_en.md new file mode 100644 index 00000000000000..b6d165d851dc51 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_wwm_words_law_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_wwm_words_law BertEmbeddings from ssbuild +author: John Snow Labs +name: bert_wwm_words_law +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_wwm_words_law` is a English model originally trained by ssbuild. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_wwm_words_law_en_5.1.1_3.0_1694583048594.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_wwm_words_law_en_5.1.1_3.0_1694583048594.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_wwm_words_law","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_wwm_words_law", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_wwm_words_law| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|424.2 MB| + +## References + +https://huggingface.co/ssbuild/bert_wwm_words_law \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_yelp_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_yelp_en.md new file mode 100644 index 00000000000000..20781554b69aa9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_yelp_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_yelp BertEmbeddings from spicecloud +author: John Snow Labs +name: bert_yelp +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_yelp` is a English model originally trained by spicecloud. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_yelp_en_5.1.1_3.0_1694578158390.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_yelp_en_5.1.1_3.0_1694578158390.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_yelp","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_yelp", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_yelp| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/spicecloud/bert-yelp \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_yelp_local_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_yelp_local_en.md new file mode 100644 index 00000000000000..34590cb2b70605 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_yelp_local_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_yelp_local BertEmbeddings from spicecloud +author: John Snow Labs +name: bert_yelp_local +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_yelp_local` is a English model originally trained by spicecloud. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_yelp_local_en_5.1.1_3.0_1694578003743.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_yelp_local_en_5.1.1_3.0_1694578003743.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_yelp_local","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_yelp_local", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_yelp_local| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/spicecloud/bert-yelp-local \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bertimbau_base_finetuned_lener_breton_pt.md b/docs/_posts/ahmedlone127/2023-09-13-bertimbau_base_finetuned_lener_breton_pt.md new file mode 100644 index 00000000000000..a5e45323acaf3c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bertimbau_base_finetuned_lener_breton_pt.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Portuguese bertimbau_base_finetuned_lener_breton BertEmbeddings from Luciano +author: John Snow Labs +name: bertimbau_base_finetuned_lener_breton +date: 2023-09-13 +tags: [bert, pt, open_source, fill_mask, onnx] +task: Embeddings +language: pt +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bertimbau_base_finetuned_lener_breton` is a Portuguese model originally trained by Luciano. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bertimbau_base_finetuned_lener_breton_pt_5.1.1_3.0_1694580858415.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bertimbau_base_finetuned_lener_breton_pt_5.1.1_3.0_1694580858415.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bertimbau_base_finetuned_lener_breton","pt") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bertimbau_base_finetuned_lener_breton", "pt") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bertimbau_base_finetuned_lener_breton| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|pt| +|Size:|405.9 MB| + +## References + +https://huggingface.co/Luciano/bertimbau-base-finetuned-lener-br \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bertjewdialdataallqonly128_en.md b/docs/_posts/ahmedlone127/2023-09-13-bertjewdialdataallqonly128_en.md new file mode 100644 index 00000000000000..cb61fc45eb9617 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bertjewdialdataallqonly128_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bertjewdialdataallqonly128 BertEmbeddings from Jeska +author: John Snow Labs +name: bertjewdialdataallqonly128 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bertjewdialdataallqonly128` is a English model originally trained by Jeska. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bertjewdialdataallqonly128_en_5.1.1_3.0_1694563422405.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bertjewdialdataallqonly128_en_5.1.1_3.0_1694563422405.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bertjewdialdataallqonly128","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bertjewdialdataallqonly128", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bertjewdialdataallqonly128| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|406.8 MB| + +## References + +https://huggingface.co/Jeska/BertjeWDialDataALLQonly128 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-biblitbert_1_en.md b/docs/_posts/ahmedlone127/2023-09-13-biblitbert_1_en.md new file mode 100644 index 00000000000000..8824d2a4111687 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-biblitbert_1_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English biblitbert_1 BertEmbeddings from vppvgit +author: John Snow Labs +name: biblitbert_1 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`biblitbert_1` is a English model originally trained by vppvgit. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/biblitbert_1_en_5.1.1_3.0_1694582201983.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/biblitbert_1_en_5.1.1_3.0_1694582201983.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("biblitbert_1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("biblitbert_1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|biblitbert_1| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|411.7 MB| + +## References + +https://huggingface.co/vppvgit/BiblItBERT-1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bio_minialbert_128_en.md b/docs/_posts/ahmedlone127/2023-09-13-bio_minialbert_128_en.md new file mode 100644 index 00000000000000..735a049c1999b9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bio_minialbert_128_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bio_minialbert_128 BertEmbeddings from nlpie +author: John Snow Labs +name: bio_minialbert_128 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bio_minialbert_128` is a English model originally trained by nlpie. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bio_minialbert_128_en_5.1.1_3.0_1694568055117.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bio_minialbert_128_en_5.1.1_3.0_1694568055117.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bio_minialbert_128","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bio_minialbert_128", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bio_minialbert_128| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|243.6 MB| + +## References + +https://huggingface.co/nlpie/bio-miniALBERT-128 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bio_tinybert_en.md b/docs/_posts/ahmedlone127/2023-09-13-bio_tinybert_en.md new file mode 100644 index 00000000000000..a25dbbbc345763 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bio_tinybert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bio_tinybert BertEmbeddings from nlpie +author: John Snow Labs +name: bio_tinybert +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bio_tinybert` is a English model originally trained by nlpie. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bio_tinybert_en_5.1.1_3.0_1694577599069.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bio_tinybert_en_5.1.1_3.0_1694577599069.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bio_tinybert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bio_tinybert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bio_tinybert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|53.8 MB| + +## References + +https://huggingface.co/nlpie/bio-tinybert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-biobert_base_1.2_en.md b/docs/_posts/ahmedlone127/2023-09-13-biobert_base_1.2_en.md new file mode 100644 index 00000000000000..37f3e529a1179b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-biobert_base_1.2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English biobert_base_1.2 BertEmbeddings from abnuel +author: John Snow Labs +name: biobert_base_1.2 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`biobert_base_1.2` is a English model originally trained by abnuel. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/biobert_base_1.2_en_5.1.1_3.0_1694573075905.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/biobert_base_1.2_en_5.1.1_3.0_1694573075905.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("biobert_base_1.2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("biobert_base_1.2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|biobert_base_1.2| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.6 MB| + +## References + +https://huggingface.co/abnuel/biobert-base_1.2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-biobertpt_all_pt.md b/docs/_posts/ahmedlone127/2023-09-13-biobertpt_all_pt.md new file mode 100644 index 00000000000000..4c949a0e8a3ce2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-biobertpt_all_pt.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Portuguese biobertpt_all BertEmbeddings from pucpr +author: John Snow Labs +name: biobertpt_all +date: 2023-09-13 +tags: [bert, pt, open_source, fill_mask, onnx] +task: Embeddings +language: pt +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`biobertpt_all` is a Portuguese model originally trained by pucpr. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/biobertpt_all_pt_5.1.1_3.0_1694564346908.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/biobertpt_all_pt_5.1.1_3.0_1694564346908.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("biobertpt_all","pt") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("biobertpt_all", "pt") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|biobertpt_all| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|pt| +|Size:|664.8 MB| + +## References + +https://huggingface.co/pucpr/biobertpt-all \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-biobertpt_bio_pt.md b/docs/_posts/ahmedlone127/2023-09-13-biobertpt_bio_pt.md new file mode 100644 index 00000000000000..a99ea40f5089cb --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-biobertpt_bio_pt.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Portuguese biobertpt_bio BertEmbeddings from pucpr +author: John Snow Labs +name: biobertpt_bio +date: 2023-09-13 +tags: [bert, pt, open_source, fill_mask, onnx] +task: Embeddings +language: pt +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`biobertpt_bio` is a Portuguese model originally trained by pucpr. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/biobertpt_bio_pt_5.1.1_3.0_1694564511348.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/biobertpt_bio_pt_5.1.1_3.0_1694564511348.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("biobertpt_bio","pt") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("biobertpt_bio", "pt") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|biobertpt_bio| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|pt| +|Size:|665.0 MB| + +## References + +https://huggingface.co/pucpr/biobertpt-bio \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-biobertpt_clin_pt.md b/docs/_posts/ahmedlone127/2023-09-13-biobertpt_clin_pt.md new file mode 100644 index 00000000000000..f1500d423fab4f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-biobertpt_clin_pt.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Portuguese biobertpt_clin BertEmbeddings from pucpr +author: John Snow Labs +name: biobertpt_clin +date: 2023-09-13 +tags: [bert, pt, open_source, fill_mask, onnx] +task: Embeddings +language: pt +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`biobertpt_clin` is a Portuguese model originally trained by pucpr. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/biobertpt_clin_pt_5.1.1_3.0_1694564736648.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/biobertpt_clin_pt_5.1.1_3.0_1694564736648.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("biobertpt_clin","pt") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("biobertpt_clin", "pt") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|biobertpt_clin| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|pt| +|Size:|665.0 MB| + +## References + +https://huggingface.co/pucpr/biobertpt-clin \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bioformer_16l_en.md b/docs/_posts/ahmedlone127/2023-09-13-bioformer_16l_en.md new file mode 100644 index 00000000000000..fadb7c01fd2fbc --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bioformer_16l_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bioformer_16l BertEmbeddings from bioformers +author: John Snow Labs +name: bioformer_16l +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bioformer_16l` is a English model originally trained by bioformers. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bioformer_16l_en_5.1.1_3.0_1694566235721.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bioformer_16l_en_5.1.1_3.0_1694566235721.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bioformer_16l","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bioformer_16l", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bioformer_16l| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|155.3 MB| + +## References + +https://huggingface.co/bioformers/bioformer-16L \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-biomedical_en.md b/docs/_posts/ahmedlone127/2023-09-13-biomedical_en.md new file mode 100644 index 00000000000000..b913be3cc32eba --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-biomedical_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English biomedical BertEmbeddings from ajitrajasekharan +author: John Snow Labs +name: biomedical +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`biomedical` is a English model originally trained by ajitrajasekharan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/biomedical_en_5.1.1_3.0_1694577740537.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/biomedical_en_5.1.1_3.0_1694577740537.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("biomedical","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("biomedical", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|biomedical| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/ajitrajasekharan/biomedical \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bioptimus_en.md b/docs/_posts/ahmedlone127/2023-09-13-bioptimus_en.md new file mode 100644 index 00000000000000..9338e76033d79b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bioptimus_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bioptimus BertEmbeddings from rttl-ai +author: John Snow Labs +name: bioptimus +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bioptimus` is a English model originally trained by rttl-ai. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bioptimus_en_5.1.1_3.0_1694587762594.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bioptimus_en_5.1.1_3.0_1694587762594.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bioptimus","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bioptimus", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bioptimus| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|408.1 MB| + +## References + +https://huggingface.co/rttl-ai/BIOptimus \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-burmese_awesome_eli5_mlm_model_en.md b/docs/_posts/ahmedlone127/2023-09-13-burmese_awesome_eli5_mlm_model_en.md new file mode 100644 index 00000000000000..5bab73d8dc62fd --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-burmese_awesome_eli5_mlm_model_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English burmese_awesome_eli5_mlm_model BertEmbeddings from JackWolfard +author: John Snow Labs +name: burmese_awesome_eli5_mlm_model +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_eli5_mlm_model` is a English model originally trained by JackWolfard. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_eli5_mlm_model_en_5.1.1_3.0_1694583256576.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_eli5_mlm_model_en_5.1.1_3.0_1694583256576.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("burmese_awesome_eli5_mlm_model","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("burmese_awesome_eli5_mlm_model", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_eli5_mlm_model| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|523.9 KB| + +## References + +https://huggingface.co/JackWolfard/my_awesome_eli5_mlm_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-burmese_awesome_model_alexyalunin_en.md b/docs/_posts/ahmedlone127/2023-09-13-burmese_awesome_model_alexyalunin_en.md new file mode 100644 index 00000000000000..7a02b6b052b48a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-burmese_awesome_model_alexyalunin_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English burmese_awesome_model_alexyalunin BertEmbeddings from alexyalunin +author: John Snow Labs +name: burmese_awesome_model_alexyalunin +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_model_alexyalunin` is a English model originally trained by alexyalunin. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_model_alexyalunin_en_5.1.1_3.0_1694578520145.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_model_alexyalunin_en_5.1.1_3.0_1694578520145.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("burmese_awesome_model_alexyalunin","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("burmese_awesome_model_alexyalunin", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_model_alexyalunin| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|664.3 MB| + +## References + +https://huggingface.co/alexyalunin/my-awesome-model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-chefberto_italian_cased_it.md b/docs/_posts/ahmedlone127/2023-09-13-chefberto_italian_cased_it.md new file mode 100644 index 00000000000000..89b3dbd0bf80a9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-chefberto_italian_cased_it.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Italian chefberto_italian_cased BertEmbeddings from vinhood +author: John Snow Labs +name: chefberto_italian_cased +date: 2023-09-13 +tags: [bert, it, open_source, fill_mask, onnx] +task: Embeddings +language: it +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`chefberto_italian_cased` is a Italian model originally trained by vinhood. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/chefberto_italian_cased_it_5.1.1_3.0_1694581754685.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/chefberto_italian_cased_it_5.1.1_3.0_1694581754685.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("chefberto_italian_cased","it") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("chefberto_italian_cased", "it") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|chefberto_italian_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|it| +|Size:|412.6 MB| + +## References + +https://huggingface.co/vinhood/chefberto-italian-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-chemical_bert_uncased_en.md b/docs/_posts/ahmedlone127/2023-09-13-chemical_bert_uncased_en.md new file mode 100644 index 00000000000000..8fcb986dd8d999 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-chemical_bert_uncased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English chemical_bert_uncased BertEmbeddings from recobo +author: John Snow Labs +name: chemical_bert_uncased +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`chemical_bert_uncased` is a English model originally trained by recobo. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/chemical_bert_uncased_en_5.1.1_3.0_1694566856839.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/chemical_bert_uncased_en_5.1.1_3.0_1694566856839.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("chemical_bert_uncased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("chemical_bert_uncased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|chemical_bert_uncased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|409.1 MB| + +## References + +https://huggingface.co/recobo/chemical-bert-uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-chupeto_en.md b/docs/_posts/ahmedlone127/2023-09-13-chupeto_en.md new file mode 100644 index 00000000000000..c3acf86a8e73d3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-chupeto_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English chupeto BertEmbeddings from justinian336 +author: John Snow Labs +name: chupeto +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`chupeto` is a English model originally trained by justinian336. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/chupeto_en_5.1.1_3.0_1694577833102.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/chupeto_en_5.1.1_3.0_1694577833102.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("chupeto","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("chupeto", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|chupeto| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|665.0 MB| + +## References + +https://huggingface.co/justinian336/chupeto \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-clinical_minialbert_312_en.md b/docs/_posts/ahmedlone127/2023-09-13-clinical_minialbert_312_en.md new file mode 100644 index 00000000000000..33b21c2e528c93 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-clinical_minialbert_312_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English clinical_minialbert_312 BertEmbeddings from nlpie +author: John Snow Labs +name: clinical_minialbert_312 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`clinical_minialbert_312` is a English model originally trained by nlpie. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/clinical_minialbert_312_en_5.1.1_3.0_1694574036589.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/clinical_minialbert_312_en_5.1.1_3.0_1694574036589.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("clinical_minialbert_312","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("clinical_minialbert_312", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|clinical_minialbert_312| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|243.6 MB| + +## References + +https://huggingface.co/nlpie/clinical-miniALBERT-312 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-clinical_pubmed_bert_base_128_en.md b/docs/_posts/ahmedlone127/2023-09-13-clinical_pubmed_bert_base_128_en.md new file mode 100644 index 00000000000000..9a61fb8060b94f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-clinical_pubmed_bert_base_128_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English clinical_pubmed_bert_base_128 BertEmbeddings from Tsubasaz +author: John Snow Labs +name: clinical_pubmed_bert_base_128 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`clinical_pubmed_bert_base_128` is a English model originally trained by Tsubasaz. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/clinical_pubmed_bert_base_128_en_5.1.1_3.0_1694573024229.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/clinical_pubmed_bert_base_128_en_5.1.1_3.0_1694573024229.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("clinical_pubmed_bert_base_128","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("clinical_pubmed_bert_base_128", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|clinical_pubmed_bert_base_128| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|408.0 MB| + +## References + +https://huggingface.co/Tsubasaz/clinical-pubmed-bert-base-128 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-clinical_pubmed_bert_base_512_en.md b/docs/_posts/ahmedlone127/2023-09-13-clinical_pubmed_bert_base_512_en.md new file mode 100644 index 00000000000000..0240fd62528afd --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-clinical_pubmed_bert_base_512_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English clinical_pubmed_bert_base_512 BertEmbeddings from Tsubasaz +author: John Snow Labs +name: clinical_pubmed_bert_base_512 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`clinical_pubmed_bert_base_512` is a English model originally trained by Tsubasaz. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/clinical_pubmed_bert_base_512_en_5.1.1_3.0_1694573181218.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/clinical_pubmed_bert_base_512_en_5.1.1_3.0_1694573181218.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("clinical_pubmed_bert_base_512","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("clinical_pubmed_bert_base_512", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|clinical_pubmed_bert_base_512| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|408.0 MB| + +## References + +https://huggingface.co/Tsubasaz/clinical-pubmed-bert-base-512 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-clr_finetuned_bert_base_uncased_en.md b/docs/_posts/ahmedlone127/2023-09-13-clr_finetuned_bert_base_uncased_en.md new file mode 100644 index 00000000000000..3837138d40d8e0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-clr_finetuned_bert_base_uncased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English clr_finetuned_bert_base_uncased BertEmbeddings from SauravMaheshkar +author: John Snow Labs +name: clr_finetuned_bert_base_uncased +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`clr_finetuned_bert_base_uncased` is a English model originally trained by SauravMaheshkar. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/clr_finetuned_bert_base_uncased_en_5.1.1_3.0_1694570611508.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/clr_finetuned_bert_base_uncased_en_5.1.1_3.0_1694570611508.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("clr_finetuned_bert_base_uncased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("clr_finetuned_bert_base_uncased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|clr_finetuned_bert_base_uncased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|406.7 MB| + +## References + +https://huggingface.co/SauravMaheshkar/clr-finetuned-bert-base-uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-clr_finetuned_bert_large_uncased_en.md b/docs/_posts/ahmedlone127/2023-09-13-clr_finetuned_bert_large_uncased_en.md new file mode 100644 index 00000000000000..065b6c5b7f9ed8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-clr_finetuned_bert_large_uncased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English clr_finetuned_bert_large_uncased BertEmbeddings from SauravMaheshkar +author: John Snow Labs +name: clr_finetuned_bert_large_uncased +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`clr_finetuned_bert_large_uncased` is a English model originally trained by SauravMaheshkar. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/clr_finetuned_bert_large_uncased_en_5.1.1_3.0_1694570948330.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/clr_finetuned_bert_large_uncased_en_5.1.1_3.0_1694570948330.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("clr_finetuned_bert_large_uncased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("clr_finetuned_bert_large_uncased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|clr_finetuned_bert_large_uncased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/SauravMaheshkar/clr-finetuned-bert-large-uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-clr_pretrained_bert_base_uncased_en.md b/docs/_posts/ahmedlone127/2023-09-13-clr_pretrained_bert_base_uncased_en.md new file mode 100644 index 00000000000000..a95a1ffb937be3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-clr_pretrained_bert_base_uncased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English clr_pretrained_bert_base_uncased BertEmbeddings from SauravMaheshkar +author: John Snow Labs +name: clr_pretrained_bert_base_uncased +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`clr_pretrained_bert_base_uncased` is a English model originally trained by SauravMaheshkar. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/clr_pretrained_bert_base_uncased_en_5.1.1_3.0_1694571112395.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/clr_pretrained_bert_base_uncased_en_5.1.1_3.0_1694571112395.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("clr_pretrained_bert_base_uncased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("clr_pretrained_bert_base_uncased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|clr_pretrained_bert_base_uncased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/SauravMaheshkar/clr-pretrained-bert-base-uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-compact_biobert_en.md b/docs/_posts/ahmedlone127/2023-09-13-compact_biobert_en.md new file mode 100644 index 00000000000000..144968fab29a3c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-compact_biobert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English compact_biobert BertEmbeddings from nlpie +author: John Snow Labs +name: compact_biobert +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`compact_biobert` is a English model originally trained by nlpie. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/compact_biobert_en_5.1.1_3.0_1694574999951.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/compact_biobert_en_5.1.1_3.0_1694574999951.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("compact_biobert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("compact_biobert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|compact_biobert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|244.4 MB| + +## References + +https://huggingface.co/nlpie/compact-biobert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-condenser_en.md b/docs/_posts/ahmedlone127/2023-09-13-condenser_en.md new file mode 100644 index 00000000000000..7f1dac0527eb84 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-condenser_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English condenser BertEmbeddings from Luyu +author: John Snow Labs +name: condenser +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`condenser` is a English model originally trained by Luyu. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/condenser_en_5.1.1_3.0_1694567000935.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/condenser_en_5.1.1_3.0_1694567000935.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("condenser","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("condenser", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|condenser| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.3 MB| + +## References + +https://huggingface.co/Luyu/condenser \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-continue_mlm_en.md b/docs/_posts/ahmedlone127/2023-09-13-continue_mlm_en.md new file mode 100644 index 00000000000000..fc267d90feb3fa --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-continue_mlm_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English continue_mlm BertEmbeddings from researchaccount +author: John Snow Labs +name: continue_mlm +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`continue_mlm` is a English model originally trained by researchaccount. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/continue_mlm_en_5.1.1_3.0_1694567886435.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/continue_mlm_en_5.1.1_3.0_1694567886435.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("continue_mlm","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("continue_mlm", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|continue_mlm| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|608.4 MB| + +## References + +https://huggingface.co/researchaccount/continue_mlm \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-contractbr_bert_base_portuguese_en.md b/docs/_posts/ahmedlone127/2023-09-13-contractbr_bert_base_portuguese_en.md new file mode 100644 index 00000000000000..7755c4473c57b0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-contractbr_bert_base_portuguese_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English contractbr_bert_base_portuguese BertEmbeddings from gacosta +author: John Snow Labs +name: contractbr_bert_base_portuguese +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`contractbr_bert_base_portuguese` is a English model originally trained by gacosta. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/contractbr_bert_base_portuguese_en_5.1.1_3.0_1694582459489.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/contractbr_bert_base_portuguese_en_5.1.1_3.0_1694582459489.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("contractbr_bert_base_portuguese","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("contractbr_bert_base_portuguese", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|contractbr_bert_base_portuguese| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|405.9 MB| + +## References + +https://huggingface.co/gacosta/contractbr-bert-base-portuguese \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-cordbert_1000_v1_en.md b/docs/_posts/ahmedlone127/2023-09-13-cordbert_1000_v1_en.md new file mode 100644 index 00000000000000..77c0c28060c6d3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-cordbert_1000_v1_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English cordbert_1000_v1 BertEmbeddings from tanvir21 +author: John Snow Labs +name: cordbert_1000_v1 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`cordbert_1000_v1` is a English model originally trained by tanvir21. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/cordbert_1000_v1_en_5.1.1_3.0_1694574593447.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/cordbert_1000_v1_en_5.1.1_3.0_1694574593447.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("cordbert_1000_v1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("cordbert_1000_v1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|cordbert_1000_v1| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/tanvir21/cordBERT-1000-v1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-corsican_condenser_marco_en.md b/docs/_posts/ahmedlone127/2023-09-13-corsican_condenser_marco_en.md new file mode 100644 index 00000000000000..c37cd0b2fc911c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-corsican_condenser_marco_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English corsican_condenser_marco BertEmbeddings from Luyu +author: John Snow Labs +name: corsican_condenser_marco +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`corsican_condenser_marco` is a English model originally trained by Luyu. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/corsican_condenser_marco_en_5.1.1_3.0_1694566669130.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/corsican_condenser_marco_en_5.1.1_3.0_1694566669130.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("corsican_condenser_marco","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("corsican_condenser_marco", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|corsican_condenser_marco| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.4 MB| + +## References + +https://huggingface.co/Luyu/co-condenser-marco \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-corsican_condenser_wiki_en.md b/docs/_posts/ahmedlone127/2023-09-13-corsican_condenser_wiki_en.md new file mode 100644 index 00000000000000..1f65c52df534b7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-corsican_condenser_wiki_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English corsican_condenser_wiki BertEmbeddings from Luyu +author: John Snow Labs +name: corsican_condenser_wiki +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`corsican_condenser_wiki` is a English model originally trained by Luyu. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/corsican_condenser_wiki_en_5.1.1_3.0_1694566835743.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/corsican_condenser_wiki_en_5.1.1_3.0_1694566835743.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("corsican_condenser_wiki","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("corsican_condenser_wiki", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|corsican_condenser_wiki| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.4 MB| + +## References + +https://huggingface.co/Luyu/co-condenser-wiki \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-czert_b_base_cased_cs.md b/docs/_posts/ahmedlone127/2023-09-13-czert_b_base_cased_cs.md new file mode 100644 index 00000000000000..75cb46fa7c6a55 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-czert_b_base_cased_cs.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Czech czert_b_base_cased BertEmbeddings from UWB-AIR +author: John Snow Labs +name: czert_b_base_cased +date: 2023-09-13 +tags: [bert, cs, open_source, fill_mask, onnx] +task: Embeddings +language: cs +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`czert_b_base_cased` is a Czech model originally trained by UWB-AIR. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/czert_b_base_cased_cs_5.1.1_3.0_1694574332325.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/czert_b_base_cased_cs_5.1.1_3.0_1694574332325.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("czert_b_base_cased","cs") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("czert_b_base_cased", "cs") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|czert_b_base_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|cs| +|Size:|408.3 MB| + +## References + +https://huggingface.co/UWB-AIR/Czert-B-base-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-dam_bert_base_mlm_msmarco_en.md b/docs/_posts/ahmedlone127/2023-09-13-dam_bert_base_mlm_msmarco_en.md new file mode 100644 index 00000000000000..adfd1bdd103345 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-dam_bert_base_mlm_msmarco_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English dam_bert_base_mlm_msmarco BertEmbeddings from jingtao +author: John Snow Labs +name: dam_bert_base_mlm_msmarco +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dam_bert_base_mlm_msmarco` is a English model originally trained by jingtao. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dam_bert_base_mlm_msmarco_en_5.1.1_3.0_1694572069064.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dam_bert_base_mlm_msmarco_en_5.1.1_3.0_1694572069064.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("dam_bert_base_mlm_msmarco","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("dam_bert_base_mlm_msmarco", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dam_bert_base_mlm_msmarco| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/jingtao/DAM-bert_base-mlm-msmarco \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-dam_bert_base_mlm_msmarco_lotte_life_test_en.md b/docs/_posts/ahmedlone127/2023-09-13-dam_bert_base_mlm_msmarco_lotte_life_test_en.md new file mode 100644 index 00000000000000..d17418592ff471 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-dam_bert_base_mlm_msmarco_lotte_life_test_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English dam_bert_base_mlm_msmarco_lotte_life_test BertEmbeddings from jingtao +author: John Snow Labs +name: dam_bert_base_mlm_msmarco_lotte_life_test +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dam_bert_base_mlm_msmarco_lotte_life_test` is a English model originally trained by jingtao. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dam_bert_base_mlm_msmarco_lotte_life_test_en_5.1.1_3.0_1694570882292.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dam_bert_base_mlm_msmarco_lotte_life_test_en_5.1.1_3.0_1694570882292.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("dam_bert_base_mlm_msmarco_lotte_life_test","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("dam_bert_base_mlm_msmarco_lotte_life_test", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dam_bert_base_mlm_msmarco_lotte_life_test| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/jingtao/DAM-bert_base-mlm-msmarco-lotte_life_test \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-dam_bert_base_mlm_msmarco_lotte_rec_test_en.md b/docs/_posts/ahmedlone127/2023-09-13-dam_bert_base_mlm_msmarco_lotte_rec_test_en.md new file mode 100644 index 00000000000000..99c897c087e9d6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-dam_bert_base_mlm_msmarco_lotte_rec_test_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English dam_bert_base_mlm_msmarco_lotte_rec_test BertEmbeddings from jingtao +author: John Snow Labs +name: dam_bert_base_mlm_msmarco_lotte_rec_test +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dam_bert_base_mlm_msmarco_lotte_rec_test` is a English model originally trained by jingtao. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dam_bert_base_mlm_msmarco_lotte_rec_test_en_5.1.1_3.0_1694571547494.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dam_bert_base_mlm_msmarco_lotte_rec_test_en_5.1.1_3.0_1694571547494.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("dam_bert_base_mlm_msmarco_lotte_rec_test","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("dam_bert_base_mlm_msmarco_lotte_rec_test", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dam_bert_base_mlm_msmarco_lotte_rec_test| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/jingtao/DAM-bert_base-mlm-msmarco-lotte_rec_test \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-dam_bert_base_mlm_msmarco_lotte_sci_test_en.md b/docs/_posts/ahmedlone127/2023-09-13-dam_bert_base_mlm_msmarco_lotte_sci_test_en.md new file mode 100644 index 00000000000000..3010fc78c3ea32 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-dam_bert_base_mlm_msmarco_lotte_sci_test_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English dam_bert_base_mlm_msmarco_lotte_sci_test BertEmbeddings from jingtao +author: John Snow Labs +name: dam_bert_base_mlm_msmarco_lotte_sci_test +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dam_bert_base_mlm_msmarco_lotte_sci_test` is a English model originally trained by jingtao. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dam_bert_base_mlm_msmarco_lotte_sci_test_en_5.1.1_3.0_1694571388033.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dam_bert_base_mlm_msmarco_lotte_sci_test_en_5.1.1_3.0_1694571388033.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("dam_bert_base_mlm_msmarco_lotte_sci_test","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("dam_bert_base_mlm_msmarco_lotte_sci_test", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dam_bert_base_mlm_msmarco_lotte_sci_test| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/jingtao/DAM-bert_base-mlm-msmarco-lotte_sci_test \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-dam_bert_base_mlm_msmarco_lotte_tech_test_en.md b/docs/_posts/ahmedlone127/2023-09-13-dam_bert_base_mlm_msmarco_lotte_tech_test_en.md new file mode 100644 index 00000000000000..71ea0695d2c733 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-dam_bert_base_mlm_msmarco_lotte_tech_test_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English dam_bert_base_mlm_msmarco_lotte_tech_test BertEmbeddings from jingtao +author: John Snow Labs +name: dam_bert_base_mlm_msmarco_lotte_tech_test +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dam_bert_base_mlm_msmarco_lotte_tech_test` is a English model originally trained by jingtao. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dam_bert_base_mlm_msmarco_lotte_tech_test_en_5.1.1_3.0_1694571198236.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dam_bert_base_mlm_msmarco_lotte_tech_test_en_5.1.1_3.0_1694571198236.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("dam_bert_base_mlm_msmarco_lotte_tech_test","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("dam_bert_base_mlm_msmarco_lotte_tech_test", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dam_bert_base_mlm_msmarco_lotte_tech_test| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/jingtao/DAM-bert_base-mlm-msmarco-lotte_tech_test \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-dam_bert_base_mlm_msmarco_lotte_write_test_en.md b/docs/_posts/ahmedlone127/2023-09-13-dam_bert_base_mlm_msmarco_lotte_write_test_en.md new file mode 100644 index 00000000000000..1d6ad632de2181 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-dam_bert_base_mlm_msmarco_lotte_write_test_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English dam_bert_base_mlm_msmarco_lotte_write_test BertEmbeddings from jingtao +author: John Snow Labs +name: dam_bert_base_mlm_msmarco_lotte_write_test +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dam_bert_base_mlm_msmarco_lotte_write_test` is a English model originally trained by jingtao. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dam_bert_base_mlm_msmarco_lotte_write_test_en_5.1.1_3.0_1694571044394.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dam_bert_base_mlm_msmarco_lotte_write_test_en_5.1.1_3.0_1694571044394.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("dam_bert_base_mlm_msmarco_lotte_write_test","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("dam_bert_base_mlm_msmarco_lotte_write_test", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dam_bert_base_mlm_msmarco_lotte_write_test| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/jingtao/DAM-bert_base-mlm-msmarco-lotte_write_test \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-dam_bert_base_mlm_msmarco_trec_covid_en.md b/docs/_posts/ahmedlone127/2023-09-13-dam_bert_base_mlm_msmarco_trec_covid_en.md new file mode 100644 index 00000000000000..956dfdf9c5a66f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-dam_bert_base_mlm_msmarco_trec_covid_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English dam_bert_base_mlm_msmarco_trec_covid BertEmbeddings from jingtao +author: John Snow Labs +name: dam_bert_base_mlm_msmarco_trec_covid +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dam_bert_base_mlm_msmarco_trec_covid` is a English model originally trained by jingtao. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dam_bert_base_mlm_msmarco_trec_covid_en_5.1.1_3.0_1694571695715.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dam_bert_base_mlm_msmarco_trec_covid_en_5.1.1_3.0_1694571695715.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("dam_bert_base_mlm_msmarco_trec_covid","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("dam_bert_base_mlm_msmarco_trec_covid", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dam_bert_base_mlm_msmarco_trec_covid| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/jingtao/DAM-bert_base-mlm-msmarco-trec_covid \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-dapt_bert_ko.md b/docs/_posts/ahmedlone127/2023-09-13-dapt_bert_ko.md new file mode 100644 index 00000000000000..9a282a35eb3c46 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-dapt_bert_ko.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Korean dapt_bert BertEmbeddings from Kdogs +author: John Snow Labs +name: dapt_bert +date: 2023-09-13 +tags: [bert, ko, open_source, fill_mask, onnx] +task: Embeddings +language: ko +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dapt_bert` is a Korean model originally trained by Kdogs. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dapt_bert_ko_5.1.1_3.0_1694576688190.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dapt_bert_ko_5.1.1_3.0_1694576688190.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("dapt_bert","ko") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("dapt_bert", "ko") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dapt_bert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|ko| +|Size:|412.4 MB| + +## References + +https://huggingface.co/Kdogs/dapt_bert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-darijabert_arabizi_ar.md b/docs/_posts/ahmedlone127/2023-09-13-darijabert_arabizi_ar.md new file mode 100644 index 00000000000000..48a8755bfb50a2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-darijabert_arabizi_ar.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Arabic darijabert_arabizi BertEmbeddings from SI2M-Lab +author: John Snow Labs +name: darijabert_arabizi +date: 2023-09-13 +tags: [bert, ar, open_source, fill_mask, onnx] +task: Embeddings +language: ar +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`darijabert_arabizi` is a Arabic model originally trained by SI2M-Lab. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/darijabert_arabizi_ar_5.1.1_3.0_1694564181705.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/darijabert_arabizi_ar_5.1.1_3.0_1694564181705.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("darijabert_arabizi","ar") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("darijabert_arabizi", "ar") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|darijabert_arabizi| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|ar| +|Size:|634.9 MB| + +## References + +https://huggingface.co/SI2M-Lab/DarijaBERT-arabizi \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-dbbert_el.md b/docs/_posts/ahmedlone127/2023-09-13-dbbert_el.md new file mode 100644 index 00000000000000..64b4def3a1c89d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-dbbert_el.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Modern Greek (1453-) dbbert BertEmbeddings from colinswaelens +author: John Snow Labs +name: dbbert +date: 2023-09-13 +tags: [bert, el, open_source, fill_mask, onnx] +task: Embeddings +language: el +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dbbert` is a Modern Greek (1453-) model originally trained by colinswaelens. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dbbert_el_5.1.1_3.0_1694581792538.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dbbert_el_5.1.1_3.0_1694581792538.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("dbbert","el") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("dbbert", "el") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dbbert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|el| +|Size:|408.3 MB| + +## References + +https://huggingface.co/colinswaelens/DBBErt \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-dictbert_en.md b/docs/_posts/ahmedlone127/2023-09-13-dictbert_en.md new file mode 100644 index 00000000000000..7fd09a811d9552 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-dictbert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English dictbert BertEmbeddings from wyu1 +author: John Snow Labs +name: dictbert +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dictbert` is a English model originally trained by wyu1. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dictbert_en_5.1.1_3.0_1694574371852.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dictbert_en_5.1.1_3.0_1694574371852.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("dictbert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("dictbert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dictbert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/wyu1/DictBERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-distil_biobert_en.md b/docs/_posts/ahmedlone127/2023-09-13-distil_biobert_en.md new file mode 100644 index 00000000000000..e0a3a57abe0059 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-distil_biobert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distil_biobert BertEmbeddings from nlpie +author: John Snow Labs +name: distil_biobert +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distil_biobert` is a English model originally trained by nlpie. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distil_biobert_en_5.1.1_3.0_1694574845011.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distil_biobert_en_5.1.1_3.0_1694574845011.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("distil_biobert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("distil_biobert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distil_biobert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|244.4 MB| + +## References + +https://huggingface.co/nlpie/distil-biobert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-domain_adapted_contriever_en.md b/docs/_posts/ahmedlone127/2023-09-13-domain_adapted_contriever_en.md new file mode 100644 index 00000000000000..d1e06eeaea4573 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-domain_adapted_contriever_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English domain_adapted_contriever BertEmbeddings from secilozksen +author: John Snow Labs +name: domain_adapted_contriever +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`domain_adapted_contriever` is a English model originally trained by secilozksen. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/domain_adapted_contriever_en_5.1.1_3.0_1694567650087.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/domain_adapted_contriever_en_5.1.1_3.0_1694567650087.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("domain_adapted_contriever","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("domain_adapted_contriever", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|domain_adapted_contriever| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.3 MB| + +## References + +https://huggingface.co/secilozksen/domain-adapted-contriever \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-dpr_catalan_passage_encoder_viquiquad_base_en.md b/docs/_posts/ahmedlone127/2023-09-13-dpr_catalan_passage_encoder_viquiquad_base_en.md new file mode 100644 index 00000000000000..b53404b799c571 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-dpr_catalan_passage_encoder_viquiquad_base_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English dpr_catalan_passage_encoder_viquiquad_base BertEmbeddings from Koslav +author: John Snow Labs +name: dpr_catalan_passage_encoder_viquiquad_base +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dpr_catalan_passage_encoder_viquiquad_base` is a English model originally trained by Koslav. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dpr_catalan_passage_encoder_viquiquad_base_en_5.1.1_3.0_1694580713399.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dpr_catalan_passage_encoder_viquiquad_base_en_5.1.1_3.0_1694580713399.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("dpr_catalan_passage_encoder_viquiquad_base","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("dpr_catalan_passage_encoder_viquiquad_base", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dpr_catalan_passage_encoder_viquiquad_base| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|409.5 MB| + +## References + +https://huggingface.co/Koslav/dpr-catalan-passage_encoder-viquiquad-base \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-dpr_catalan_question_encoder_viquiquad_base_en.md b/docs/_posts/ahmedlone127/2023-09-13-dpr_catalan_question_encoder_viquiquad_base_en.md new file mode 100644 index 00000000000000..546745425754ff --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-dpr_catalan_question_encoder_viquiquad_base_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English dpr_catalan_question_encoder_viquiquad_base BertEmbeddings from Koslav +author: John Snow Labs +name: dpr_catalan_question_encoder_viquiquad_base +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dpr_catalan_question_encoder_viquiquad_base` is a English model originally trained by Koslav. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dpr_catalan_question_encoder_viquiquad_base_en_5.1.1_3.0_1694579879042.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dpr_catalan_question_encoder_viquiquad_base_en_5.1.1_3.0_1694579879042.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("dpr_catalan_question_encoder_viquiquad_base","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("dpr_catalan_question_encoder_viquiquad_base", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dpr_catalan_question_encoder_viquiquad_base| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|409.5 MB| + +## References + +https://huggingface.co/Koslav/dpr-catalan-question_encoder-viquiquad-base \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-dragon_plus_context_encoder_en.md b/docs/_posts/ahmedlone127/2023-09-13-dragon_plus_context_encoder_en.md new file mode 100644 index 00000000000000..1b1ccc964427bd --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-dragon_plus_context_encoder_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English dragon_plus_context_encoder BertEmbeddings from facebook +author: John Snow Labs +name: dragon_plus_context_encoder +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dragon_plus_context_encoder` is a English model originally trained by facebook. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dragon_plus_context_encoder_en_5.1.1_3.0_1694576995913.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dragon_plus_context_encoder_en_5.1.1_3.0_1694576995913.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("dragon_plus_context_encoder","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("dragon_plus_context_encoder", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dragon_plus_context_encoder| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.5 MB| + +## References + +https://huggingface.co/facebook/dragon-plus-context-encoder \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-dragon_plus_query_encoder_en.md b/docs/_posts/ahmedlone127/2023-09-13-dragon_plus_query_encoder_en.md new file mode 100644 index 00000000000000..fb4aea1535cc65 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-dragon_plus_query_encoder_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English dragon_plus_query_encoder BertEmbeddings from facebook +author: John Snow Labs +name: dragon_plus_query_encoder +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dragon_plus_query_encoder` is a English model originally trained by facebook. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dragon_plus_query_encoder_en_5.1.1_3.0_1694576871066.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dragon_plus_query_encoder_en_5.1.1_3.0_1694576871066.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("dragon_plus_query_encoder","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("dragon_plus_query_encoder", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dragon_plus_query_encoder| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.4 MB| + +## References + +https://huggingface.co/facebook/dragon-plus-query-encoder \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-dummy_model_aripo99_en.md b/docs/_posts/ahmedlone127/2023-09-13-dummy_model_aripo99_en.md new file mode 100644 index 00000000000000..520d13d7a60b50 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-dummy_model_aripo99_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English dummy_model_aripo99 BertEmbeddings from aripo99 +author: John Snow Labs +name: dummy_model_aripo99 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dummy_model_aripo99` is a English model originally trained by aripo99. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dummy_model_aripo99_en_5.1.1_3.0_1694581314642.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dummy_model_aripo99_en_5.1.1_3.0_1694581314642.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("dummy_model_aripo99","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("dummy_model_aripo99", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dummy_model_aripo99| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/aripo99/dummy_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-dummy_prac_model_1_en.md b/docs/_posts/ahmedlone127/2023-09-13-dummy_prac_model_1_en.md new file mode 100644 index 00000000000000..6c27cd0264995b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-dummy_prac_model_1_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English dummy_prac_model_1 BertEmbeddings from fayez94 +author: John Snow Labs +name: dummy_prac_model_1 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dummy_prac_model_1` is a English model originally trained by fayez94. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dummy_prac_model_1_en_5.1.1_3.0_1694564301534.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dummy_prac_model_1_en_5.1.1_3.0_1694564301534.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("dummy_prac_model_1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("dummy_prac_model_1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dummy_prac_model_1| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.6 MB| + +## References + +https://huggingface.co/fayez94/dummy_prac_model_1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-dummy_prac_model_en.md b/docs/_posts/ahmedlone127/2023-09-13-dummy_prac_model_en.md new file mode 100644 index 00000000000000..85213af1c02129 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-dummy_prac_model_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English dummy_prac_model BertEmbeddings from fayez94 +author: John Snow Labs +name: dummy_prac_model +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dummy_prac_model` is a English model originally trained by fayez94. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dummy_prac_model_en_5.1.1_3.0_1694564149966.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dummy_prac_model_en_5.1.1_3.0_1694564149966.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("dummy_prac_model","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("dummy_prac_model", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dummy_prac_model| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.6 MB| + +## References + +https://huggingface.co/fayez94/dummy_prac_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-dziribert_ar.md b/docs/_posts/ahmedlone127/2023-09-13-dziribert_ar.md new file mode 100644 index 00000000000000..8b9d37ffceb42d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-dziribert_ar.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Arabic dziribert BertEmbeddings from alger-ia +author: John Snow Labs +name: dziribert +date: 2023-09-13 +tags: [bert, ar, open_source, fill_mask, onnx] +task: Embeddings +language: ar +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dziribert` is a Arabic model originally trained by alger-ia. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dziribert_ar_5.1.1_3.0_1694578688485.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dziribert_ar_5.1.1_3.0_1694578688485.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("dziribert","ar") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("dziribert", "ar") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dziribert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|ar| +|Size:|462.5 MB| + +## References + +https://huggingface.co/alger-ia/dziribert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-e4a_covid_bert_base_romanian_cased_v1_en.md b/docs/_posts/ahmedlone127/2023-09-13-e4a_covid_bert_base_romanian_cased_v1_en.md new file mode 100644 index 00000000000000..e993f7f2b6522b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-e4a_covid_bert_base_romanian_cased_v1_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English e4a_covid_bert_base_romanian_cased_v1 BertEmbeddings from racai +author: John Snow Labs +name: e4a_covid_bert_base_romanian_cased_v1 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`e4a_covid_bert_base_romanian_cased_v1` is a English model originally trained by racai. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/e4a_covid_bert_base_romanian_cased_v1_en_5.1.1_3.0_1694580152465.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/e4a_covid_bert_base_romanian_cased_v1_en_5.1.1_3.0_1694580152465.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("e4a_covid_bert_base_romanian_cased_v1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("e4a_covid_bert_base_romanian_cased_v1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|e4a_covid_bert_base_romanian_cased_v1| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|464.0 MB| + +## References + +https://huggingface.co/racai/e4a-covid-bert-base-romanian-cased-v1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-e4a_permits_bert_base_romanian_cased_v1_en.md b/docs/_posts/ahmedlone127/2023-09-13-e4a_permits_bert_base_romanian_cased_v1_en.md new file mode 100644 index 00000000000000..3560d137a409ff --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-e4a_permits_bert_base_romanian_cased_v1_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English e4a_permits_bert_base_romanian_cased_v1 BertEmbeddings from racai +author: John Snow Labs +name: e4a_permits_bert_base_romanian_cased_v1 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`e4a_permits_bert_base_romanian_cased_v1` is a English model originally trained by racai. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/e4a_permits_bert_base_romanian_cased_v1_en_5.1.1_3.0_1694580307122.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/e4a_permits_bert_base_romanian_cased_v1_en_5.1.1_3.0_1694580307122.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("e4a_permits_bert_base_romanian_cased_v1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("e4a_permits_bert_base_romanian_cased_v1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|e4a_permits_bert_base_romanian_cased_v1| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|464.0 MB| + +## References + +https://huggingface.co/racai/e4a-permits-bert-base-romanian-cased-v1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-ecomm_sbert_en.md b/docs/_posts/ahmedlone127/2023-09-13-ecomm_sbert_en.md new file mode 100644 index 00000000000000..b8cb1125cb7fad --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-ecomm_sbert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English ecomm_sbert BertEmbeddings from Maunish +author: John Snow Labs +name: ecomm_sbert +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ecomm_sbert` is a English model originally trained by Maunish. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ecomm_sbert_en_5.1.1_3.0_1694567607524.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ecomm_sbert_en_5.1.1_3.0_1694567607524.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("ecomm_sbert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("ecomm_sbert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ecomm_sbert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|84.6 MB| + +## References + +https://huggingface.co/Maunish/ecomm-sbert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-energybert_en.md b/docs/_posts/ahmedlone127/2023-09-13-energybert_en.md new file mode 100644 index 00000000000000..2c3804620bf30c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-energybert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English energybert BertEmbeddings from UNSW-MasterAI +author: John Snow Labs +name: energybert +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`energybert` is a English model originally trained by UNSW-MasterAI. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/energybert_en_5.1.1_3.0_1694577639489.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/energybert_en_5.1.1_3.0_1694577639489.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("energybert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("energybert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|energybert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|408.1 MB| + +## References + +https://huggingface.co/UNSW-MasterAI/EnergyBERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-ern3_en.md b/docs/_posts/ahmedlone127/2023-09-13-ern3_en.md new file mode 100644 index 00000000000000..d3a2d264656ffe --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-ern3_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English ern3 BertEmbeddings from linyi +author: John Snow Labs +name: ern3 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ern3` is a English model originally trained by linyi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ern3_en_5.1.1_3.0_1694582155435.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ern3_en_5.1.1_3.0_1694582155435.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("ern3","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("ern3", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ern3| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|440.8 MB| + +## References + +https://huggingface.co/linyi/ern3 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-esci_mlm_alllang_bert_base_uncased_en.md b/docs/_posts/ahmedlone127/2023-09-13-esci_mlm_alllang_bert_base_uncased_en.md new file mode 100644 index 00000000000000..dc0421d1284e58 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-esci_mlm_alllang_bert_base_uncased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English esci_mlm_alllang_bert_base_uncased BertEmbeddings from spacemanidol +author: John Snow Labs +name: esci_mlm_alllang_bert_base_uncased +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`esci_mlm_alllang_bert_base_uncased` is a English model originally trained by spacemanidol. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/esci_mlm_alllang_bert_base_uncased_en_5.1.1_3.0_1694576764282.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/esci_mlm_alllang_bert_base_uncased_en_5.1.1_3.0_1694576764282.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("esci_mlm_alllang_bert_base_uncased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("esci_mlm_alllang_bert_base_uncased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|esci_mlm_alllang_bert_base_uncased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.9 MB| + +## References + +https://huggingface.co/spacemanidol/esci-mlm-alllang-bert-base-uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-estbert_512_et.md b/docs/_posts/ahmedlone127/2023-09-13-estbert_512_et.md new file mode 100644 index 00000000000000..58c6c881cb2ace --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-estbert_512_et.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Estonian estbert_512 BertEmbeddings from tartuNLP +author: John Snow Labs +name: estbert_512 +date: 2023-09-13 +tags: [bert, et, open_source, fill_mask, onnx] +task: Embeddings +language: et +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`estbert_512` is a Estonian model originally trained by tartuNLP. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/estbert_512_et_5.1.1_3.0_1694576625464.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/estbert_512_et_5.1.1_3.0_1694576625464.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("estbert_512","et") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("estbert_512", "et") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|estbert_512| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|et| +|Size:|463.4 MB| + +## References + +https://huggingface.co/tartuNLP/EstBERT_512 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-estbert_et.md b/docs/_posts/ahmedlone127/2023-09-13-estbert_et.md new file mode 100644 index 00000000000000..170d8aafd0938f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-estbert_et.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Estonian estbert BertEmbeddings from tartuNLP +author: John Snow Labs +name: estbert +date: 2023-09-13 +tags: [bert, et, open_source, fill_mask, onnx] +task: Embeddings +language: et +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`estbert` is a Estonian model originally trained by tartuNLP. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/estbert_et_5.1.1_3.0_1694576444603.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/estbert_et_5.1.1_3.0_1694576444603.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("estbert","et") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("estbert", "et") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|estbert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|et| +|Size:|463.4 MB| + +## References + +https://huggingface.co/tartuNLP/EstBERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-evalconvqa_bert_en.md b/docs/_posts/ahmedlone127/2023-09-13-evalconvqa_bert_en.md new file mode 100644 index 00000000000000..57fc7a4aea9d94 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-evalconvqa_bert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English evalconvqa_bert BertEmbeddings from princeton-nlp +author: John Snow Labs +name: evalconvqa_bert +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`evalconvqa_bert` is a English model originally trained by princeton-nlp. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/evalconvqa_bert_en_5.1.1_3.0_1694565341212.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/evalconvqa_bert_en_5.1.1_3.0_1694565341212.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("evalconvqa_bert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("evalconvqa_bert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|evalconvqa_bert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/princeton-nlp/EvalConvQA_bert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-financialbert_en.md b/docs/_posts/ahmedlone127/2023-09-13-financialbert_en.md new file mode 100644 index 00000000000000..edd59bfd4bb5af --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-financialbert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English financialbert BertEmbeddings from ahmedrachid +author: John Snow Labs +name: financialbert +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`financialbert` is a English model originally trained by ahmedrachid. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/financialbert_en_5.1.1_3.0_1694577402809.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/financialbert_en_5.1.1_3.0_1694577402809.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("financialbert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("financialbert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|financialbert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/ahmedrachid/FinancialBERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-first_model_en.md b/docs/_posts/ahmedlone127/2023-09-13-first_model_en.md new file mode 100644 index 00000000000000..b22012612473b8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-first_model_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English first_model BertEmbeddings from songqian +author: John Snow Labs +name: first_model +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`first_model` is a English model originally trained by songqian. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/first_model_en_5.1.1_3.0_1694575037544.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/first_model_en_5.1.1_3.0_1694575037544.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("first_model","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("first_model", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|first_model| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/songqian/first_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-further_train_original_10_en.md b/docs/_posts/ahmedlone127/2023-09-13-further_train_original_10_en.md new file mode 100644 index 00000000000000..fbad8893e69a78 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-further_train_original_10_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English further_train_original_10 BertEmbeddings from onlydj96 +author: John Snow Labs +name: further_train_original_10 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`further_train_original_10` is a English model originally trained by onlydj96. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/further_train_original_10_en_5.1.1_3.0_1694588052671.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/further_train_original_10_en_5.1.1_3.0_1694588052671.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("further_train_original_10","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("further_train_original_10", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|further_train_original_10| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|441.2 MB| + +## References + +https://huggingface.co/onlydj96/further_train_original_10 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-german_bert_base_german_cased_finetuned_en.md b/docs/_posts/ahmedlone127/2023-09-13-german_bert_base_german_cased_finetuned_en.md new file mode 100644 index 00000000000000..8f88cccaac2903 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-german_bert_base_german_cased_finetuned_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English german_bert_base_german_cased_finetuned BertEmbeddings from rodrigotuna +author: John Snow Labs +name: german_bert_base_german_cased_finetuned +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`german_bert_base_german_cased_finetuned` is a English model originally trained by rodrigotuna. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/german_bert_base_german_cased_finetuned_en_5.1.1_3.0_1694565721535.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/german_bert_base_german_cased_finetuned_en_5.1.1_3.0_1694565721535.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("german_bert_base_german_cased_finetuned","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("german_bert_base_german_cased_finetuned", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|german_bert_base_german_cased_finetuned| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|406.9 MB| + +## References + +https://huggingface.co/rodrigotuna/de-bert-base-german-cased-finetuned \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-german_medbert_de.md b/docs/_posts/ahmedlone127/2023-09-13-german_medbert_de.md new file mode 100644 index 00000000000000..2613b3e2831b95 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-german_medbert_de.md @@ -0,0 +1,93 @@ +--- +layout: model +title: German german_medbert BertEmbeddings from smanjil +author: John Snow Labs +name: german_medbert +date: 2023-09-13 +tags: [bert, de, open_source, fill_mask, onnx] +task: Embeddings +language: de +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`german_medbert` is a German model originally trained by smanjil. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/german_medbert_de_5.1.1_3.0_1694574616438.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/german_medbert_de_5.1.1_3.0_1694574616438.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("german_medbert","de") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("german_medbert", "de") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|german_medbert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|de| +|Size:|406.9 MB| + +## References + +https://huggingface.co/smanjil/German-MedBERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-gujibert_fan_en.md b/docs/_posts/ahmedlone127/2023-09-13-gujibert_fan_en.md new file mode 100644 index 00000000000000..974ed93ec27adc --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-gujibert_fan_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English gujibert_fan BertEmbeddings from hsc748NLP +author: John Snow Labs +name: gujibert_fan +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`gujibert_fan` is a English model originally trained by hsc748NLP. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/gujibert_fan_en_5.1.1_3.0_1694564628936.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/gujibert_fan_en_5.1.1_3.0_1694564628936.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("gujibert_fan","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("gujibert_fan", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|gujibert_fan| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|420.2 MB| + +## References + +https://huggingface.co/hsc748NLP/GujiBERT_fan \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-hebert_en.md b/docs/_posts/ahmedlone127/2023-09-13-hebert_en.md new file mode 100644 index 00000000000000..c005513098f274 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-hebert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English hebert BertEmbeddings from avichr +author: John Snow Labs +name: hebert +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`hebert` is a English model originally trained by avichr. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/hebert_en_5.1.1_3.0_1694584681748.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/hebert_en_5.1.1_3.0_1694584681748.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("hebert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("hebert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|hebert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|408.1 MB| + +## References + +https://huggingface.co/avichr/heBERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-hindi_bert_en.md b/docs/_posts/ahmedlone127/2023-09-13-hindi_bert_en.md new file mode 100644 index 00000000000000..562516ed5fe44a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-hindi_bert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English hindi_bert BertEmbeddings from sukritin +author: John Snow Labs +name: hindi_bert +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`hindi_bert` is a English model originally trained by sukritin. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/hindi_bert_en_5.1.1_3.0_1694575763223.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/hindi_bert_en_5.1.1_3.0_1694575763223.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("hindi_bert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("hindi_bert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|hindi_bert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|609.2 MB| + +## References + +https://huggingface.co/sukritin/hindi-bert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-hindi_bert_scratch_hi.md b/docs/_posts/ahmedlone127/2023-09-13-hindi_bert_scratch_hi.md new file mode 100644 index 00000000000000..a6f3495e42142e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-hindi_bert_scratch_hi.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Hindi hindi_bert_scratch BertEmbeddings from l3cube-pune +author: John Snow Labs +name: hindi_bert_scratch +date: 2023-09-13 +tags: [bert, hi, open_source, fill_mask, onnx] +task: Embeddings +language: hi +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`hindi_bert_scratch` is a Hindi model originally trained by l3cube-pune. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/hindi_bert_scratch_hi_5.1.1_3.0_1694579566926.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/hindi_bert_scratch_hi_5.1.1_3.0_1694579566926.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("hindi_bert_scratch","hi") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("hindi_bert_scratch", "hi") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|hindi_bert_scratch| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|hi| +|Size:|470.6 MB| + +## References + +https://huggingface.co/l3cube-pune/hindi-bert-scratch \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-hindi_bert_v1_hi.md b/docs/_posts/ahmedlone127/2023-09-13-hindi_bert_v1_hi.md new file mode 100644 index 00000000000000..3389947f94522e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-hindi_bert_v1_hi.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Hindi hindi_bert_v1 BertEmbeddings from l3cube-pune +author: John Snow Labs +name: hindi_bert_v1 +date: 2023-09-13 +tags: [bert, hi, open_source, fill_mask, onnx] +task: Embeddings +language: hi +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`hindi_bert_v1` is a Hindi model originally trained by l3cube-pune. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/hindi_bert_v1_hi_5.1.1_3.0_1694575771399.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/hindi_bert_v1_hi_5.1.1_3.0_1694575771399.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("hindi_bert_v1","hi") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("hindi_bert_v1", "hi") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|hindi_bert_v1| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|hi| +|Size:|663.8 MB| + +## References + +https://huggingface.co/l3cube-pune/hindi-bert-v1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-hindi_least_haitian_1m_hi.md b/docs/_posts/ahmedlone127/2023-09-13-hindi_least_haitian_1m_hi.md new file mode 100644 index 00000000000000..da1c809c5ea558 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-hindi_least_haitian_1m_hi.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Hindi hindi_least_haitian_1m BertEmbeddings from l3cube-pune +author: John Snow Labs +name: hindi_least_haitian_1m +date: 2023-09-13 +tags: [bert, hi, open_source, fill_mask, onnx] +task: Embeddings +language: hi +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`hindi_least_haitian_1m` is a Hindi model originally trained by l3cube-pune. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/hindi_least_haitian_1m_hi_5.1.1_3.0_1694584509062.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/hindi_least_haitian_1m_hi_5.1.1_3.0_1694584509062.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("hindi_least_haitian_1m","hi") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("hindi_least_haitian_1m", "hi") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|hindi_least_haitian_1m| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|hi| +|Size:|890.6 MB| + +## References + +https://huggingface.co/l3cube-pune/hi-least-ht-1m \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-hindi_marathi_dev_bert_hi.md b/docs/_posts/ahmedlone127/2023-09-13-hindi_marathi_dev_bert_hi.md new file mode 100644 index 00000000000000..9e3e31dd3ed22f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-hindi_marathi_dev_bert_hi.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Hindi hindi_marathi_dev_bert BertEmbeddings from l3cube-pune +author: John Snow Labs +name: hindi_marathi_dev_bert +date: 2023-09-13 +tags: [bert, hi, open_source, fill_mask, onnx] +task: Embeddings +language: hi +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`hindi_marathi_dev_bert` is a Hindi model originally trained by l3cube-pune. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/hindi_marathi_dev_bert_hi_5.1.1_3.0_1694576361222.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/hindi_marathi_dev_bert_hi_5.1.1_3.0_1694576361222.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("hindi_marathi_dev_bert","hi") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("hindi_marathi_dev_bert", "hi") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|hindi_marathi_dev_bert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|hi| +|Size:|890.6 MB| + +## References + +https://huggingface.co/l3cube-pune/hindi-marathi-dev-bert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-hindi_marathi_dev_bert_scratch_hi.md b/docs/_posts/ahmedlone127/2023-09-13-hindi_marathi_dev_bert_scratch_hi.md new file mode 100644 index 00000000000000..05db5ed8b72c67 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-hindi_marathi_dev_bert_scratch_hi.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Hindi hindi_marathi_dev_bert_scratch BertEmbeddings from l3cube-pune +author: John Snow Labs +name: hindi_marathi_dev_bert_scratch +date: 2023-09-13 +tags: [bert, hi, open_source, fill_mask, onnx] +task: Embeddings +language: hi +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`hindi_marathi_dev_bert_scratch` is a Hindi model originally trained by l3cube-pune. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/hindi_marathi_dev_bert_scratch_hi_5.1.1_3.0_1694579727628.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/hindi_marathi_dev_bert_scratch_hi_5.1.1_3.0_1694579727628.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("hindi_marathi_dev_bert_scratch","hi") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("hindi_marathi_dev_bert_scratch", "hi") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|hindi_marathi_dev_bert_scratch| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|hi| +|Size:|608.7 MB| + +## References + +https://huggingface.co/l3cube-pune/hindi-marathi-dev-bert-scratch \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-hindi_tweets_bert_hateful_hi.md b/docs/_posts/ahmedlone127/2023-09-13-hindi_tweets_bert_hateful_hi.md new file mode 100644 index 00000000000000..da69bfdddb0cb7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-hindi_tweets_bert_hateful_hi.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Hindi hindi_tweets_bert_hateful BertEmbeddings from l3cube-pune +author: John Snow Labs +name: hindi_tweets_bert_hateful +date: 2023-09-13 +tags: [bert, hi, open_source, fill_mask, onnx] +task: Embeddings +language: hi +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`hindi_tweets_bert_hateful` is a Hindi model originally trained by l3cube-pune. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/hindi_tweets_bert_hateful_hi_5.1.1_3.0_1694584250203.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/hindi_tweets_bert_hateful_hi_5.1.1_3.0_1694584250203.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("hindi_tweets_bert_hateful","hi") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("hindi_tweets_bert_hateful", "hi") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|hindi_tweets_bert_hateful| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|hi| +|Size:|890.6 MB| + +## References + +https://huggingface.co/l3cube-pune/hindi-tweets-bert-hateful \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-hindi_tweets_bert_hi.md b/docs/_posts/ahmedlone127/2023-09-13-hindi_tweets_bert_hi.md new file mode 100644 index 00000000000000..e059fc94d31bb5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-hindi_tweets_bert_hi.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Hindi hindi_tweets_bert BertEmbeddings from l3cube-pune +author: John Snow Labs +name: hindi_tweets_bert +date: 2023-09-13 +tags: [bert, hi, open_source, fill_mask, onnx] +task: Embeddings +language: hi +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`hindi_tweets_bert` is a Hindi model originally trained by l3cube-pune. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/hindi_tweets_bert_hi_5.1.1_3.0_1694581273510.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/hindi_tweets_bert_hi_5.1.1_3.0_1694581273510.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("hindi_tweets_bert","hi") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("hindi_tweets_bert", "hi") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|hindi_tweets_bert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|hi| +|Size:|890.7 MB| + +## References + +https://huggingface.co/l3cube-pune/hindi-tweets-bert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-improvedabg_20_epochs_en.md b/docs/_posts/ahmedlone127/2023-09-13-improvedabg_20_epochs_en.md new file mode 100644 index 00000000000000..0b353761ff19b1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-improvedabg_20_epochs_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English improvedabg_20_epochs BertEmbeddings from Embible +author: John Snow Labs +name: improvedabg_20_epochs +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`improvedabg_20_epochs` is a English model originally trained by Embible. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/improvedabg_20_epochs_en_5.1.1_3.0_1694563461257.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/improvedabg_20_epochs_en_5.1.1_3.0_1694563461257.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("improvedabg_20_epochs","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("improvedabg_20_epochs", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|improvedabg_20_epochs| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|689.6 MB| + +## References + +https://huggingface.co/Embible/improvedABG-20-epochs \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-improvedabg_50_epochs_en.md b/docs/_posts/ahmedlone127/2023-09-13-improvedabg_50_epochs_en.md new file mode 100644 index 00000000000000..7198fd3847eb15 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-improvedabg_50_epochs_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English improvedabg_50_epochs BertEmbeddings from Embible +author: John Snow Labs +name: improvedabg_50_epochs +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`improvedabg_50_epochs` is a English model originally trained by Embible. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/improvedabg_50_epochs_en_5.1.1_3.0_1694563658734.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/improvedabg_50_epochs_en_5.1.1_3.0_1694563658734.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("improvedabg_50_epochs","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("improvedabg_50_epochs", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|improvedabg_50_epochs| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|689.4 MB| + +## References + +https://huggingface.co/Embible/improvedABG-50-epochs \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-inlegalbert_en.md b/docs/_posts/ahmedlone127/2023-09-13-inlegalbert_en.md new file mode 100644 index 00000000000000..e589b3dc1280a4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-inlegalbert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English inlegalbert BertEmbeddings from law-ai +author: John Snow Labs +name: inlegalbert +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`inlegalbert` is a English model originally trained by law-ai. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/inlegalbert_en_5.1.1_3.0_1694582770345.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/inlegalbert_en_5.1.1_3.0_1694582770345.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("inlegalbert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("inlegalbert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|inlegalbert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/law-ai/InLegalBERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-javanese_bert_small_imdb_jv.md b/docs/_posts/ahmedlone127/2023-09-13-javanese_bert_small_imdb_jv.md new file mode 100644 index 00000000000000..498e4e60dc789f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-javanese_bert_small_imdb_jv.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Javanese javanese_bert_small_imdb BertEmbeddings from w11wo +author: John Snow Labs +name: javanese_bert_small_imdb +date: 2023-09-13 +tags: [bert, jv, open_source, fill_mask, onnx] +task: Embeddings +language: jv +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`javanese_bert_small_imdb` is a Javanese model originally trained by w11wo. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/javanese_bert_small_imdb_jv_5.1.1_3.0_1694582477865.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/javanese_bert_small_imdb_jv_5.1.1_3.0_1694582477865.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("javanese_bert_small_imdb","jv") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("javanese_bert_small_imdb", "jv") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|javanese_bert_small_imdb| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|jv| +|Size:|407.3 MB| + +## References + +https://huggingface.co/w11wo/javanese-bert-small-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-kazakhbertmulti_kk.md b/docs/_posts/ahmedlone127/2023-09-13-kazakhbertmulti_kk.md new file mode 100644 index 00000000000000..d19355b01557e5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-kazakhbertmulti_kk.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Kazakh kazakhbertmulti BertEmbeddings from amandyk +author: John Snow Labs +name: kazakhbertmulti +date: 2023-09-13 +tags: [bert, kk, open_source, fill_mask, onnx] +task: Embeddings +language: kk +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`kazakhbertmulti` is a Kazakh model originally trained by amandyk. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/kazakhbertmulti_kk_5.1.1_3.0_1694567001040.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/kazakhbertmulti_kk_5.1.1_3.0_1694567001040.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("kazakhbertmulti","kk") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("kazakhbertmulti", "kk") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|kazakhbertmulti| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|kk| +|Size:|609.9 MB| + +## References + +https://huggingface.co/amandyk/KazakhBERTmulti \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-kcbert_base_dev_en.md b/docs/_posts/ahmedlone127/2023-09-13-kcbert_base_dev_en.md new file mode 100644 index 00000000000000..e8fc285327efa2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-kcbert_base_dev_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English kcbert_base_dev BertEmbeddings from beomi +author: John Snow Labs +name: kcbert_base_dev +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`kcbert_base_dev` is a English model originally trained by beomi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/kcbert_base_dev_en_5.1.1_3.0_1694586571113.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/kcbert_base_dev_en_5.1.1_3.0_1694586571113.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("kcbert_base_dev","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("kcbert_base_dev", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|kcbert_base_dev| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|406.3 MB| + +## References + +https://huggingface.co/beomi/kcbert-base-dev \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-kcbert_base_petition_en.md b/docs/_posts/ahmedlone127/2023-09-13-kcbert_base_petition_en.md new file mode 100644 index 00000000000000..e3dfe66d543d72 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-kcbert_base_petition_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English kcbert_base_petition BertEmbeddings from Kyoungmin +author: John Snow Labs +name: kcbert_base_petition +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`kcbert_base_petition` is a English model originally trained by Kyoungmin. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/kcbert_base_petition_en_5.1.1_3.0_1694565357198.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/kcbert_base_petition_en_5.1.1_3.0_1694565357198.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("kcbert_base_petition","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("kcbert_base_petition", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|kcbert_base_petition| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|406.2 MB| + +## References + +https://huggingface.co/Kyoungmin/kcbert-base-petition \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-kcbert_large_dev_en.md b/docs/_posts/ahmedlone127/2023-09-13-kcbert_large_dev_en.md new file mode 100644 index 00000000000000..3231fe67c06f5f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-kcbert_large_dev_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English kcbert_large_dev BertEmbeddings from beomi +author: John Snow Labs +name: kcbert_large_dev +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`kcbert_large_dev` is a English model originally trained by beomi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/kcbert_large_dev_en_5.1.1_3.0_1694587013172.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/kcbert_large_dev_en_5.1.1_3.0_1694587013172.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("kcbert_large_dev","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("kcbert_large_dev", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|kcbert_large_dev| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/beomi/kcbert-large-dev \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-kcbert_large_en.md b/docs/_posts/ahmedlone127/2023-09-13-kcbert_large_en.md new file mode 100644 index 00000000000000..4750d48d788aef --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-kcbert_large_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English kcbert_large BertEmbeddings from beomi +author: John Snow Labs +name: kcbert_large +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`kcbert_large` is a English model originally trained by beomi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/kcbert_large_en_5.1.1_3.0_1694587353127.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/kcbert_large_en_5.1.1_3.0_1694587353127.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("kcbert_large","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("kcbert_large", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|kcbert_large| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/beomi/kcbert-large \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-kcbert_mlm_finetune_en.md b/docs/_posts/ahmedlone127/2023-09-13-kcbert_mlm_finetune_en.md new file mode 100644 index 00000000000000..ab9f10ea0b1bb9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-kcbert_mlm_finetune_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English kcbert_mlm_finetune BertEmbeddings from stresscaptor +author: John Snow Labs +name: kcbert_mlm_finetune +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`kcbert_mlm_finetune` is a English model originally trained by stresscaptor. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/kcbert_mlm_finetune_en_5.1.1_3.0_1694567744835.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/kcbert_mlm_finetune_en_5.1.1_3.0_1694567744835.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("kcbert_mlm_finetune","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("kcbert_mlm_finetune", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|kcbert_mlm_finetune| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|406.4 MB| + +## References + +https://huggingface.co/stresscaptor/kcbert-mlm-finetune \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-kpfbert_base_en.md b/docs/_posts/ahmedlone127/2023-09-13-kpfbert_base_en.md new file mode 100644 index 00000000000000..5f84ee9050008c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-kpfbert_base_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English kpfbert_base BertEmbeddings from yunaissance +author: John Snow Labs +name: kpfbert_base +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`kpfbert_base` is a English model originally trained by yunaissance. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/kpfbert_base_en_5.1.1_3.0_1694584038349.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/kpfbert_base_en_5.1.1_3.0_1694584038349.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("kpfbert_base","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("kpfbert_base", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|kpfbert_base| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|425.1 MB| + +## References + +https://huggingface.co/yunaissance/kpfbert-base \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-labse_english_russian_erzya_v1_ru.md b/docs/_posts/ahmedlone127/2023-09-13-labse_english_russian_erzya_v1_ru.md new file mode 100644 index 00000000000000..f3fb0cffb9aa05 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-labse_english_russian_erzya_v1_ru.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Russian labse_english_russian_erzya_v1 BertEmbeddings from slone +author: John Snow Labs +name: labse_english_russian_erzya_v1 +date: 2023-09-13 +tags: [bert, ru, open_source, fill_mask, onnx] +task: Embeddings +language: ru +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`labse_english_russian_erzya_v1` is a Russian model originally trained by slone. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/labse_english_russian_erzya_v1_ru_5.1.1_3.0_1694583966275.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/labse_english_russian_erzya_v1_ru_5.1.1_3.0_1694583966275.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("labse_english_russian_erzya_v1","ru") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("labse_english_russian_erzya_v1", "ru") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|labse_english_russian_erzya_v1| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|ru| +|Size:|535.5 MB| + +## References + +https://huggingface.co/slone/LaBSE-en-ru-myv-v1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-legal_bert_base_uncased_finetuned_rramicus_en.md b/docs/_posts/ahmedlone127/2023-09-13-legal_bert_base_uncased_finetuned_rramicus_en.md new file mode 100644 index 00000000000000..29fa712a5d4e73 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-legal_bert_base_uncased_finetuned_rramicus_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English legal_bert_base_uncased_finetuned_rramicus BertEmbeddings from repro-rights-amicus-briefs +author: John Snow Labs +name: legal_bert_base_uncased_finetuned_rramicus +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`legal_bert_base_uncased_finetuned_rramicus` is a English model originally trained by repro-rights-amicus-briefs. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/legal_bert_base_uncased_finetuned_rramicus_en_5.1.1_3.0_1694567706023.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/legal_bert_base_uncased_finetuned_rramicus_en_5.1.1_3.0_1694567706023.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("legal_bert_base_uncased_finetuned_rramicus","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("legal_bert_base_uncased_finetuned_rramicus", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|legal_bert_base_uncased_finetuned_rramicus| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/repro-rights-amicus-briefs/legal-bert-base-uncased-finetuned-RRamicus \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-legalbert_adept_en.md b/docs/_posts/ahmedlone127/2023-09-13-legalbert_adept_en.md new file mode 100644 index 00000000000000..30fc217e2ac076 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-legalbert_adept_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English legalbert_adept BertEmbeddings from hatemestinbejaia +author: John Snow Labs +name: legalbert_adept +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`legalbert_adept` is a English model originally trained by hatemestinbejaia. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/legalbert_adept_en_5.1.1_3.0_1694567799021.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/legalbert_adept_en_5.1.1_3.0_1694567799021.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("legalbert_adept","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("legalbert_adept", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|legalbert_adept| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/hatemestinbejaia/legalbert-adept \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-legalnlp_bert_pt.md b/docs/_posts/ahmedlone127/2023-09-13-legalnlp_bert_pt.md new file mode 100644 index 00000000000000..ac59b8243b994d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-legalnlp_bert_pt.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Portuguese legalnlp_bert BertEmbeddings from felipemaiapolo +author: John Snow Labs +name: legalnlp_bert +date: 2023-09-13 +tags: [bert, pt, open_source, fill_mask, onnx] +task: Embeddings +language: pt +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`legalnlp_bert` is a Portuguese model originally trained by felipemaiapolo. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/legalnlp_bert_pt_5.1.1_3.0_1694580772789.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/legalnlp_bert_pt_5.1.1_3.0_1694580772789.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("legalnlp_bert","pt") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("legalnlp_bert", "pt") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|legalnlp_bert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|pt| +|Size:|405.8 MB| + +## References + +https://huggingface.co/felipemaiapolo/legalnlp-bert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-lesssexistbert_en.md b/docs/_posts/ahmedlone127/2023-09-13-lesssexistbert_en.md new file mode 100644 index 00000000000000..b24b0324e69c1e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-lesssexistbert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English lesssexistbert BertEmbeddings from clincolnoz +author: John Snow Labs +name: lesssexistbert +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`lesssexistbert` is a English model originally trained by clincolnoz. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/lesssexistbert_en_5.1.1_3.0_1694578655498.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/lesssexistbert_en_5.1.1_3.0_1694578655498.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("lesssexistbert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("lesssexistbert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|lesssexistbert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.3 MB| + +## References + +https://huggingface.co/clincolnoz/LessSexistBERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-lsg_bert_base_uncased_4096_en.md b/docs/_posts/ahmedlone127/2023-09-13-lsg_bert_base_uncased_4096_en.md new file mode 100644 index 00000000000000..1645fb87513f11 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-lsg_bert_base_uncased_4096_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English lsg_bert_base_uncased_4096 BertEmbeddings from ccdv +author: John Snow Labs +name: lsg_bert_base_uncased_4096 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`lsg_bert_base_uncased_4096` is a English model originally trained by ccdv. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/lsg_bert_base_uncased_4096_en_5.1.1_3.0_1694564185857.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/lsg_bert_base_uncased_4096_en_5.1.1_3.0_1694564185857.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("lsg_bert_base_uncased_4096","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("lsg_bert_base_uncased_4096", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|lsg_bert_base_uncased_4096| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|417.5 MB| + +## References + +https://huggingface.co/ccdv/lsg-bert-base-uncased-4096 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-mabepa_sts_es.md b/docs/_posts/ahmedlone127/2023-09-13-mabepa_sts_es.md new file mode 100644 index 00000000000000..46fd6e911ad2a3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-mabepa_sts_es.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Castilian, Spanish mabepa_sts BertEmbeddings from Brendar +author: John Snow Labs +name: mabepa_sts +date: 2023-09-13 +tags: [bert, es, open_source, fill_mask, onnx] +task: Embeddings +language: es +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mabepa_sts` is a Castilian, Spanish model originally trained by Brendar. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mabepa_sts_es_5.1.1_3.0_1694584433265.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mabepa_sts_es_5.1.1_3.0_1694584433265.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("mabepa_sts","es") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("mabepa_sts", "es") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mabepa_sts| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|es| +|Size:|409.6 MB| + +## References + +https://huggingface.co/Brendar/MaBePa_STS \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-malay_bert_en.md b/docs/_posts/ahmedlone127/2023-09-13-malay_bert_en.md new file mode 100644 index 00000000000000..b8a4aa84b861b5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-malay_bert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English malay_bert BertEmbeddings from NLP4H +author: John Snow Labs +name: malay_bert +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`malay_bert` is a English model originally trained by NLP4H. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/malay_bert_en_5.1.1_3.0_1694568227304.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/malay_bert_en_5.1.1_3.0_1694568227304.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("malay_bert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("malay_bert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|malay_bert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|408.2 MB| + +## References + +https://huggingface.co/NLP4H/ms_bert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-marathi_bert_scratch_mr.md b/docs/_posts/ahmedlone127/2023-09-13-marathi_bert_scratch_mr.md new file mode 100644 index 00000000000000..5de0ded2a4897b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-marathi_bert_scratch_mr.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Marathi marathi_bert_scratch BertEmbeddings from l3cube-pune +author: John Snow Labs +name: marathi_bert_scratch +date: 2023-09-13 +tags: [bert, mr, open_source, fill_mask, onnx] +task: Embeddings +language: mr +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`marathi_bert_scratch` is a Marathi model originally trained by l3cube-pune. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/marathi_bert_scratch_mr_5.1.1_3.0_1694579363809.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/marathi_bert_scratch_mr_5.1.1_3.0_1694579363809.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("marathi_bert_scratch","mr") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("marathi_bert_scratch", "mr") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|marathi_bert_scratch| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|mr| +|Size:|470.3 MB| + +## References + +https://huggingface.co/l3cube-pune/marathi-bert-scratch \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-marathi_tweets_bert_hateful_mr.md b/docs/_posts/ahmedlone127/2023-09-13-marathi_tweets_bert_hateful_mr.md new file mode 100644 index 00000000000000..0be66bec861ee8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-marathi_tweets_bert_hateful_mr.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Marathi marathi_tweets_bert_hateful BertEmbeddings from l3cube-pune +author: John Snow Labs +name: marathi_tweets_bert_hateful +date: 2023-09-13 +tags: [bert, mr, open_source, fill_mask, onnx] +task: Embeddings +language: mr +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`marathi_tweets_bert_hateful` is a Marathi model originally trained by l3cube-pune. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/marathi_tweets_bert_hateful_mr_5.1.1_3.0_1694578089840.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/marathi_tweets_bert_hateful_mr_5.1.1_3.0_1694578089840.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("marathi_tweets_bert_hateful","mr") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("marathi_tweets_bert_hateful", "mr") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|marathi_tweets_bert_hateful| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|mr| +|Size:|890.7 MB| + +## References + +https://huggingface.co/l3cube-pune/marathi-tweets-bert-hateful \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-marathi_tweets_bert_mr.md b/docs/_posts/ahmedlone127/2023-09-13-marathi_tweets_bert_mr.md new file mode 100644 index 00000000000000..018c729848ea80 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-marathi_tweets_bert_mr.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Marathi marathi_tweets_bert BertEmbeddings from l3cube-pune +author: John Snow Labs +name: marathi_tweets_bert +date: 2023-09-13 +tags: [bert, mr, open_source, fill_mask, onnx] +task: Embeddings +language: mr +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`marathi_tweets_bert` is a Marathi model originally trained by l3cube-pune. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/marathi_tweets_bert_mr_5.1.1_3.0_1694576608611.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/marathi_tweets_bert_mr_5.1.1_3.0_1694576608611.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("marathi_tweets_bert","mr") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("marathi_tweets_bert", "mr") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|marathi_tweets_bert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|mr| +|Size:|890.6 MB| + +## References + +https://huggingface.co/l3cube-pune/marathi-tweets-bert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-marbert_adept_en.md b/docs/_posts/ahmedlone127/2023-09-13-marbert_adept_en.md new file mode 100644 index 00000000000000..a944df7dd957a9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-marbert_adept_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English marbert_adept BertEmbeddings from hatemestinbejaia +author: John Snow Labs +name: marbert_adept +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`marbert_adept` is a English model originally trained by hatemestinbejaia. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/marbert_adept_en_5.1.1_3.0_1694570997383.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/marbert_adept_en_5.1.1_3.0_1694570997383.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("marbert_adept","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("marbert_adept", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|marbert_adept| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|606.7 MB| + +## References + +https://huggingface.co/hatemestinbejaia/MARBERT-adept \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-marbert_ar.md b/docs/_posts/ahmedlone127/2023-09-13-marbert_ar.md new file mode 100644 index 00000000000000..1fd0a24bafa01c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-marbert_ar.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Arabic marbert BertEmbeddings from UBC-NLP +author: John Snow Labs +name: marbert +date: 2023-09-13 +tags: [bert, ar, open_source, fill_mask, onnx] +task: Embeddings +language: ar +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`marbert` is a Arabic model originally trained by UBC-NLP. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/marbert_ar_5.1.1_3.0_1694573980919.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/marbert_ar_5.1.1_3.0_1694573980919.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("marbert","ar") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("marbert", "ar") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|marbert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|ar| +|Size:|608.7 MB| + +## References + +https://huggingface.co/UBC-NLP/MARBERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-mathbert_custom_en.md b/docs/_posts/ahmedlone127/2023-09-13-mathbert_custom_en.md new file mode 100644 index 00000000000000..54ba238f87fdf5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-mathbert_custom_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English mathbert_custom BertEmbeddings from tbs17 +author: John Snow Labs +name: mathbert_custom +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mathbert_custom` is a English model originally trained by tbs17. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mathbert_custom_en_5.1.1_3.0_1694576775346.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mathbert_custom_en_5.1.1_3.0_1694576775346.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("mathbert_custom","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("mathbert_custom", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mathbert_custom| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|408.2 MB| + +## References + +https://huggingface.co/tbs17/MathBERT-custom \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-mbert_finetuned_pytorch_en.md b/docs/_posts/ahmedlone127/2023-09-13-mbert_finetuned_pytorch_en.md new file mode 100644 index 00000000000000..063c04f6f55d97 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-mbert_finetuned_pytorch_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English mbert_finetuned_pytorch BertEmbeddings from fimu-docproc-research +author: John Snow Labs +name: mbert_finetuned_pytorch +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mbert_finetuned_pytorch` is a English model originally trained by fimu-docproc-research. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mbert_finetuned_pytorch_en_5.1.1_3.0_1694583639590.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mbert_finetuned_pytorch_en_5.1.1_3.0_1694583639590.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("mbert_finetuned_pytorch","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("mbert_finetuned_pytorch", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mbert_finetuned_pytorch| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|665.0 MB| + +## References + +https://huggingface.co/fimu-docproc-research/mbert-finetuned-pytorch \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-mbertu_arabic_en.md b/docs/_posts/ahmedlone127/2023-09-13-mbertu_arabic_en.md new file mode 100644 index 00000000000000..34acc71ebced77 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-mbertu_arabic_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English mbertu_arabic BertEmbeddings from Zappandy +author: John Snow Labs +name: mbertu_arabic +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mbertu_arabic` is a English model originally trained by Zappandy. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mbertu_arabic_en_5.1.1_3.0_1694576350158.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mbertu_arabic_en_5.1.1_3.0_1694576350158.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("mbertu_arabic","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("mbertu_arabic", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mbertu_arabic| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|664.5 MB| + +## References + +https://huggingface.co/Zappandy/mBERTu-arabic \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-minialbert_128_en.md b/docs/_posts/ahmedlone127/2023-09-13-minialbert_128_en.md new file mode 100644 index 00000000000000..9947e5777491b8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-minialbert_128_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English minialbert_128 BertEmbeddings from nlpie +author: John Snow Labs +name: minialbert_128 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`minialbert_128` is a English model originally trained by nlpie. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/minialbert_128_en_5.1.1_3.0_1694567917349.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/minialbert_128_en_5.1.1_3.0_1694567917349.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("minialbert_128","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("minialbert_128", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|minialbert_128| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|248.0 MB| + +## References + +https://huggingface.co/nlpie/miniALBERT-128 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-mlm_gh_issues_en.md b/docs/_posts/ahmedlone127/2023-09-13-mlm_gh_issues_en.md new file mode 100644 index 00000000000000..a9ed8a4ce17be7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-mlm_gh_issues_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English mlm_gh_issues BertEmbeddings from ericntay +author: John Snow Labs +name: mlm_gh_issues +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mlm_gh_issues` is a English model originally trained by ericntay. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mlm_gh_issues_en_5.1.1_3.0_1694577513579.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mlm_gh_issues_en_5.1.1_3.0_1694577513579.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("mlm_gh_issues","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("mlm_gh_issues", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mlm_gh_issues| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/ericntay/mlm_gh_issues \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-model_65000_20ep_en.md b/docs/_posts/ahmedlone127/2023-09-13-model_65000_20ep_en.md new file mode 100644 index 00000000000000..1ab4e5fa53572f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-model_65000_20ep_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English model_65000_20ep BertEmbeddings from sergiyvl +author: John Snow Labs +name: model_65000_20ep +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`model_65000_20ep` is a English model originally trained by sergiyvl. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/model_65000_20ep_en_5.1.1_3.0_1694573722433.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/model_65000_20ep_en_5.1.1_3.0_1694573722433.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("model_65000_20ep","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("model_65000_20ep", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|model_65000_20ep| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|664.5 MB| + +## References + +https://huggingface.co/sergiyvl/model_65000_20ep \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-model_ankai_en.md b/docs/_posts/ahmedlone127/2023-09-13-model_ankai_en.md new file mode 100644 index 00000000000000..0474e5a33c124b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-model_ankai_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English model_ankai BertEmbeddings from wudi7758521521 +author: John Snow Labs +name: model_ankai +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`model_ankai` is a English model originally trained by wudi7758521521. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/model_ankai_en_5.1.1_3.0_1694586066096.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/model_ankai_en_5.1.1_3.0_1694586066096.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("model_ankai","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("model_ankai", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|model_ankai| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/wudi7758521521/model_ankai \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-model_bangla_bert_en.md b/docs/_posts/ahmedlone127/2023-09-13-model_bangla_bert_en.md new file mode 100644 index 00000000000000..29a3dc0f14d069 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-model_bangla_bert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English model_bangla_bert BertEmbeddings from Kowsher +author: John Snow Labs +name: model_bangla_bert +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`model_bangla_bert` is a English model originally trained by Kowsher. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/model_bangla_bert_en_5.1.1_3.0_1694565028072.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/model_bangla_bert_en_5.1.1_3.0_1694565028072.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("model_bangla_bert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("model_bangla_bert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|model_bangla_bert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|612.1 MB| + +## References + +https://huggingface.co/Kowsher/model-bangla-bert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-moresexistbert_en.md b/docs/_posts/ahmedlone127/2023-09-13-moresexistbert_en.md new file mode 100644 index 00000000000000..15e51bcc31404a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-moresexistbert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English moresexistbert BertEmbeddings from clincolnoz +author: John Snow Labs +name: moresexistbert +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`moresexistbert` is a English model originally trained by clincolnoz. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/moresexistbert_en_5.1.1_3.0_1694577057922.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/moresexistbert_en_5.1.1_3.0_1694577057922.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("moresexistbert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("moresexistbert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|moresexistbert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|404.7 MB| + +## References + +https://huggingface.co/clincolnoz/MoreSexistBERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-muril_with_mlm_cased_temp_en.md b/docs/_posts/ahmedlone127/2023-09-13-muril_with_mlm_cased_temp_en.md new file mode 100644 index 00000000000000..bed1b3b903c55e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-muril_with_mlm_cased_temp_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English muril_with_mlm_cased_temp BertEmbeddings from simran-kh +author: John Snow Labs +name: muril_with_mlm_cased_temp +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`muril_with_mlm_cased_temp` is a English model originally trained by simran-kh. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/muril_with_mlm_cased_temp_en_5.1.1_3.0_1694574360108.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/muril_with_mlm_cased_temp_en_5.1.1_3.0_1694574360108.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("muril_with_mlm_cased_temp","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("muril_with_mlm_cased_temp", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|muril_with_mlm_cased_temp| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|890.4 MB| + +## References + +https://huggingface.co/simran-kh/muril-with-mlm-cased-temp \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-mwp_bert_english_en.md b/docs/_posts/ahmedlone127/2023-09-13-mwp_bert_english_en.md new file mode 100644 index 00000000000000..a773bb355fd881 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-mwp_bert_english_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English mwp_bert_english BertEmbeddings from invokerliang +author: John Snow Labs +name: mwp_bert_english +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mwp_bert_english` is a English model originally trained by invokerliang. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mwp_bert_english_en_5.1.1_3.0_1694575296759.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mwp_bert_english_en_5.1.1_3.0_1694575296759.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("mwp_bert_english","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("mwp_bert_english", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mwp_bert_english| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/invokerliang/MWP-BERT-en \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-mymodel005_en.md b/docs/_posts/ahmedlone127/2023-09-13-mymodel005_en.md new file mode 100644 index 00000000000000..bb998c2ce0980e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-mymodel005_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English mymodel005 BertEmbeddings from wbmitcast +author: John Snow Labs +name: mymodel005 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mymodel005` is a English model originally trained by wbmitcast. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mymodel005_en_5.1.1_3.0_1694583366404.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mymodel005_en_5.1.1_3.0_1694583366404.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("mymodel005","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("mymodel005", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mymodel005| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/wbmitcast/mymodel005 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-mymodel007_wbmitcast_en.md b/docs/_posts/ahmedlone127/2023-09-13-mymodel007_wbmitcast_en.md new file mode 100644 index 00000000000000..181bd8341e32df --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-mymodel007_wbmitcast_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English mymodel007_wbmitcast BertEmbeddings from wbmitcast +author: John Snow Labs +name: mymodel007_wbmitcast +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mymodel007_wbmitcast` is a English model originally trained by wbmitcast. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mymodel007_wbmitcast_en_5.1.1_3.0_1694583570220.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mymodel007_wbmitcast_en_5.1.1_3.0_1694583570220.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("mymodel007_wbmitcast","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("mymodel007_wbmitcast", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mymodel007_wbmitcast| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/wbmitcast/mymodel007 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-mymodel1007_en.md b/docs/_posts/ahmedlone127/2023-09-13-mymodel1007_en.md new file mode 100644 index 00000000000000..22775a1a5aa98b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-mymodel1007_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English mymodel1007 BertEmbeddings from Wilson2021 +author: John Snow Labs +name: mymodel1007 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mymodel1007` is a English model originally trained by Wilson2021. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mymodel1007_en_5.1.1_3.0_1694574662440.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mymodel1007_en_5.1.1_3.0_1694574662440.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("mymodel1007","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("mymodel1007", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mymodel1007| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/Wilson2021/mymodel1007 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-nepalibert_en.md b/docs/_posts/ahmedlone127/2023-09-13-nepalibert_en.md new file mode 100644 index 00000000000000..df8acc60b5f81c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-nepalibert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English nepalibert BertEmbeddings from Rajan +author: John Snow Labs +name: nepalibert +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`nepalibert` is a English model originally trained by Rajan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/nepalibert_en_5.1.1_3.0_1694569792967.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/nepalibert_en_5.1.1_3.0_1694569792967.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("nepalibert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("nepalibert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|nepalibert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|305.4 MB| + +## References + +https://huggingface.co/Rajan/NepaliBERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-nepalibert_ne.md b/docs/_posts/ahmedlone127/2023-09-13-nepalibert_ne.md new file mode 100644 index 00000000000000..704887a504fc5a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-nepalibert_ne.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Nepali (macrolanguage) nepalibert BertEmbeddings from Shushant +author: John Snow Labs +name: nepalibert +date: 2023-09-13 +tags: [bert, ne, open_source, fill_mask, onnx] +task: Embeddings +language: ne +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`nepalibert` is a Nepali (macrolanguage) model originally trained by Shushant. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/nepalibert_ne_5.1.1_3.0_1694571473601.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/nepalibert_ne_5.1.1_3.0_1694571473601.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("nepalibert","ne") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("nepalibert", "ne") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|nepalibert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|ne| +|Size:|408.5 MB| + +## References + +https://huggingface.co/Shushant/nepaliBERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-neuba_bert_en.md b/docs/_posts/ahmedlone127/2023-09-13-neuba_bert_en.md new file mode 100644 index 00000000000000..b2a00258eba302 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-neuba_bert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English neuba_bert BertEmbeddings from thunlp +author: John Snow Labs +name: neuba_bert +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`neuba_bert` is a English model originally trained by thunlp. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/neuba_bert_en_5.1.1_3.0_1694577955856.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/neuba_bert_en_5.1.1_3.0_1694577955856.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("neuba_bert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("neuba_bert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|neuba_bert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/thunlp/neuba-bert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-nl1_en.md b/docs/_posts/ahmedlone127/2023-09-13-nl1_en.md new file mode 100644 index 00000000000000..7c685451ff7262 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-nl1_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English nl1 BertEmbeddings from willemjan +author: John Snow Labs +name: nl1 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`nl1` is a English model originally trained by willemjan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/nl1_en_5.1.1_3.0_1694584898456.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/nl1_en_5.1.1_3.0_1694584898456.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("nl1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("nl1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|nl1| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|665.0 MB| + +## References + +https://huggingface.co/willemjan/nl1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-notram_bert_norwegian_cased_080321_no.md b/docs/_posts/ahmedlone127/2023-09-13-notram_bert_norwegian_cased_080321_no.md new file mode 100644 index 00000000000000..b7ede65d1edf53 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-notram_bert_norwegian_cased_080321_no.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Norwegian notram_bert_norwegian_cased_080321 BertEmbeddings from NbAiLab +author: John Snow Labs +name: notram_bert_norwegian_cased_080321 +date: 2023-09-13 +tags: [bert, "no", open_source, fill_mask, onnx] +task: Embeddings +language: "no" +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`notram_bert_norwegian_cased_080321` is a Norwegian model originally trained by NbAiLab. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/notram_bert_norwegian_cased_080321_no_5.1.1_3.0_1694569128176.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/notram_bert_norwegian_cased_080321_no_5.1.1_3.0_1694569128176.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("notram_bert_norwegian_cased_080321","no") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("notram_bert_norwegian_cased_080321", "no") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|notram_bert_norwegian_cased_080321| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|no| +|Size:|663.0 MB| + +## References + +https://huggingface.co/NbAiLab/notram-bert-norwegian-cased-080321 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-opticalbert_cased_en.md b/docs/_posts/ahmedlone127/2023-09-13-opticalbert_cased_en.md new file mode 100644 index 00000000000000..63d40cbc3aaeaf --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-opticalbert_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English opticalbert_cased BertEmbeddings from opticalmaterials +author: John Snow Labs +name: opticalbert_cased +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`opticalbert_cased` is a English model originally trained by opticalmaterials. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/opticalbert_cased_en_5.1.1_3.0_1694581051911.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/opticalbert_cased_en_5.1.1_3.0_1694581051911.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("opticalbert_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("opticalbert_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|opticalbert_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.5 MB| + +## References + +https://huggingface.co/opticalmaterials/opticalbert_cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-opticalbert_uncased_en.md b/docs/_posts/ahmedlone127/2023-09-13-opticalbert_uncased_en.md new file mode 100644 index 00000000000000..89048fd4589948 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-opticalbert_uncased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English opticalbert_uncased BertEmbeddings from opticalmaterials +author: John Snow Labs +name: opticalbert_uncased +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`opticalbert_uncased` is a English model originally trained by opticalmaterials. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/opticalbert_uncased_en_5.1.1_3.0_1694581416416.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/opticalbert_uncased_en_5.1.1_3.0_1694581416416.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("opticalbert_uncased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("opticalbert_uncased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|opticalbert_uncased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.0 MB| + +## References + +https://huggingface.co/opticalmaterials/opticalbert_uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-opticalpurebert_cased_en.md b/docs/_posts/ahmedlone127/2023-09-13-opticalpurebert_cased_en.md new file mode 100644 index 00000000000000..a6bf328fba73bd --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-opticalpurebert_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English opticalpurebert_cased BertEmbeddings from opticalmaterials +author: John Snow Labs +name: opticalpurebert_cased +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`opticalpurebert_cased` is a English model originally trained by opticalmaterials. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/opticalpurebert_cased_en_5.1.1_3.0_1694581559636.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/opticalpurebert_cased_en_5.1.1_3.0_1694581559636.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("opticalpurebert_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("opticalpurebert_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|opticalpurebert_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.7 MB| + +## References + +https://huggingface.co/opticalmaterials/opticalpurebert_cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-opticalpurebert_uncased_en.md b/docs/_posts/ahmedlone127/2023-09-13-opticalpurebert_uncased_en.md new file mode 100644 index 00000000000000..9f396c3edfa399 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-opticalpurebert_uncased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English opticalpurebert_uncased BertEmbeddings from opticalmaterials +author: John Snow Labs +name: opticalpurebert_uncased +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`opticalpurebert_uncased` is a English model originally trained by opticalmaterials. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/opticalpurebert_uncased_en_5.1.1_3.0_1694581713185.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/opticalpurebert_uncased_en_5.1.1_3.0_1694581713185.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("opticalpurebert_uncased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("opticalpurebert_uncased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|opticalpurebert_uncased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|408.1 MB| + +## References + +https://huggingface.co/opticalmaterials/opticalpurebert_uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-output_miladfa7_en.md b/docs/_posts/ahmedlone127/2023-09-13-output_miladfa7_en.md new file mode 100644 index 00000000000000..7ebbc4d5b7b831 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-output_miladfa7_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English output_miladfa7 BertEmbeddings from miladfa7 +author: John Snow Labs +name: output_miladfa7 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`output_miladfa7` is a English model originally trained by miladfa7. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/output_miladfa7_en_5.1.1_3.0_1694569708274.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/output_miladfa7_en_5.1.1_3.0_1694569708274.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("output_miladfa7","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("output_miladfa7", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|output_miladfa7| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|606.0 MB| + +## References + +https://huggingface.co/miladfa7/output \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-paraphraserplus_1epoch_en.md b/docs/_posts/ahmedlone127/2023-09-13-paraphraserplus_1epoch_en.md new file mode 100644 index 00000000000000..20335bd56f296d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-paraphraserplus_1epoch_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English paraphraserplus_1epoch BertEmbeddings from sergiyvl +author: John Snow Labs +name: paraphraserplus_1epoch +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`paraphraserplus_1epoch` is a English model originally trained by sergiyvl. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/paraphraserplus_1epoch_en_5.1.1_3.0_1694573041191.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/paraphraserplus_1epoch_en_5.1.1_3.0_1694573041191.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("paraphraserplus_1epoch","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("paraphraserplus_1epoch", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|paraphraserplus_1epoch| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|664.4 MB| + +## References + +https://huggingface.co/sergiyvl/ParaPhraserPlus_1epoch \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-parlamint_en.md b/docs/_posts/ahmedlone127/2023-09-13-parlamint_en.md new file mode 100644 index 00000000000000..65579e5335403e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-parlamint_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English parlamint BertEmbeddings from IneG +author: John Snow Labs +name: parlamint +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`parlamint` is a English model originally trained by IneG. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/parlamint_en_5.1.1_3.0_1694584898291.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/parlamint_en_5.1.1_3.0_1694584898291.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("parlamint","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("parlamint", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|parlamint| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|406.8 MB| + +## References + +https://huggingface.co/IneG/parlamint \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-parsbert_base_sanay_uncased_en.md b/docs/_posts/ahmedlone127/2023-09-13-parsbert_base_sanay_uncased_en.md new file mode 100644 index 00000000000000..ebf072adbafbf1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-parsbert_base_sanay_uncased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English parsbert_base_sanay_uncased BertEmbeddings from miladfa7 +author: John Snow Labs +name: parsbert_base_sanay_uncased +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`parsbert_base_sanay_uncased` is a English model originally trained by miladfa7. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/parsbert_base_sanay_uncased_en_5.1.1_3.0_1694569942308.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/parsbert_base_sanay_uncased_en_5.1.1_3.0_1694569942308.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("parsbert_base_sanay_uncased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("parsbert_base_sanay_uncased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|parsbert_base_sanay_uncased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|608.9 MB| + +## References + +https://huggingface.co/miladfa7/parsbert-base-sanay-uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-pcscibert_cased_en.md b/docs/_posts/ahmedlone127/2023-09-13-pcscibert_cased_en.md new file mode 100644 index 00000000000000..607352ec2d16c8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-pcscibert_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English pcscibert_cased BertEmbeddings from jmzk96 +author: John Snow Labs +name: pcscibert_cased +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`pcscibert_cased` is a English model originally trained by jmzk96. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/pcscibert_cased_en_5.1.1_3.0_1694586987079.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/pcscibert_cased_en_5.1.1_3.0_1694586987079.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("pcscibert_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("pcscibert_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|pcscibert_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|409.8 MB| + +## References + +https://huggingface.co/jmzk96/PCSciBERT_cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-protaugment_lm_banking77_en.md b/docs/_posts/ahmedlone127/2023-09-13-protaugment_lm_banking77_en.md new file mode 100644 index 00000000000000..0bf59c249e99db --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-protaugment_lm_banking77_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English protaugment_lm_banking77 BertEmbeddings from tdopierre +author: John Snow Labs +name: protaugment_lm_banking77 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`protaugment_lm_banking77` is a English model originally trained by tdopierre. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/protaugment_lm_banking77_en_5.1.1_3.0_1694576921524.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/protaugment_lm_banking77_en_5.1.1_3.0_1694576921524.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("protaugment_lm_banking77","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("protaugment_lm_banking77", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|protaugment_lm_banking77| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.5 MB| + +## References + +https://huggingface.co/tdopierre/ProtAugment-LM-BANKING77 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-pubmedbert_abstract_cord19_en.md b/docs/_posts/ahmedlone127/2023-09-13-pubmedbert_abstract_cord19_en.md new file mode 100644 index 00000000000000..1dfebc410806ac --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-pubmedbert_abstract_cord19_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English pubmedbert_abstract_cord19 BertEmbeddings from pritamdeka +author: John Snow Labs +name: pubmedbert_abstract_cord19 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`pubmedbert_abstract_cord19` is a English model originally trained by pritamdeka. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/pubmedbert_abstract_cord19_en_5.1.1_3.0_1694564002029.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/pubmedbert_abstract_cord19_en_5.1.1_3.0_1694564002029.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("pubmedbert_abstract_cord19","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("pubmedbert_abstract_cord19", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|pubmedbert_abstract_cord19| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|408.1 MB| + +## References + +https://huggingface.co/pritamdeka/PubMedBert-abstract-cord19 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-pubmedbert_abstract_cord19_v2_en.md b/docs/_posts/ahmedlone127/2023-09-13-pubmedbert_abstract_cord19_v2_en.md new file mode 100644 index 00000000000000..a5c4d15b137480 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-pubmedbert_abstract_cord19_v2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English pubmedbert_abstract_cord19_v2 BertEmbeddings from pritamdeka +author: John Snow Labs +name: pubmedbert_abstract_cord19_v2 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`pubmedbert_abstract_cord19_v2` is a English model originally trained by pritamdeka. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/pubmedbert_abstract_cord19_v2_en_5.1.1_3.0_1694563849710.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/pubmedbert_abstract_cord19_v2_en_5.1.1_3.0_1694563849710.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("pubmedbert_abstract_cord19_v2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("pubmedbert_abstract_cord19_v2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|pubmedbert_abstract_cord19_v2| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|408.1 MB| + +## References + +https://huggingface.co/pritamdeka/PubMedBert-abstract-cord19-v2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-pubmedbert_fulltext_cord19_en.md b/docs/_posts/ahmedlone127/2023-09-13-pubmedbert_fulltext_cord19_en.md new file mode 100644 index 00000000000000..3b7bc95189a5b5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-pubmedbert_fulltext_cord19_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English pubmedbert_fulltext_cord19 BertEmbeddings from pritamdeka +author: John Snow Labs +name: pubmedbert_fulltext_cord19 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`pubmedbert_fulltext_cord19` is a English model originally trained by pritamdeka. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/pubmedbert_fulltext_cord19_en_5.1.1_3.0_1694564166623.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/pubmedbert_fulltext_cord19_en_5.1.1_3.0_1694564166623.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("pubmedbert_fulltext_cord19","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("pubmedbert_fulltext_cord19", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|pubmedbert_fulltext_cord19| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|408.1 MB| + +## References + +https://huggingface.co/pritamdeka/PubMedBert-fulltext-cord19 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-qaideptmodel_en.md b/docs/_posts/ahmedlone127/2023-09-13-qaideptmodel_en.md new file mode 100644 index 00000000000000..91ba2dea30bd77 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-qaideptmodel_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English qaideptmodel BertEmbeddings from Razan +author: John Snow Labs +name: qaideptmodel +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`qaideptmodel` is a English model originally trained by Razan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/qaideptmodel_en_5.1.1_3.0_1694569996396.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/qaideptmodel_en_5.1.1_3.0_1694569996396.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("qaideptmodel","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("qaideptmodel", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|qaideptmodel| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|504.8 MB| + +## References + +https://huggingface.co/Razan/QAIDeptModel \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-qe3_ar.md b/docs/_posts/ahmedlone127/2023-09-13-qe3_ar.md new file mode 100644 index 00000000000000..7fbf063d360c7e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-qe3_ar.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Arabic qe3 BertEmbeddings from NLP-EXP +author: John Snow Labs +name: qe3 +date: 2023-09-13 +tags: [bert, ar, open_source, fill_mask, onnx] +task: Embeddings +language: ar +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`qe3` is a Arabic model originally trained by NLP-EXP. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/qe3_ar_5.1.1_3.0_1694580295901.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/qe3_ar_5.1.1_3.0_1694580295901.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("qe3","ar") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("qe3", "ar") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|qe3| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|ar| +|Size:|504.1 MB| + +## References + +https://huggingface.co/NLP-EXP/QE3 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-qe6_ar.md b/docs/_posts/ahmedlone127/2023-09-13-qe6_ar.md new file mode 100644 index 00000000000000..b92c874a716461 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-qe6_ar.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Arabic qe6 BertEmbeddings from NLP-EXP +author: John Snow Labs +name: qe6 +date: 2023-09-13 +tags: [bert, ar, open_source, fill_mask, onnx] +task: Embeddings +language: ar +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`qe6` is a Arabic model originally trained by NLP-EXP. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/qe6_ar_5.1.1_3.0_1694580457879.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/qe6_ar_5.1.1_3.0_1694580457879.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("qe6","ar") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("qe6", "ar") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|qe6| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|ar| +|Size:|504.2 MB| + +## References + +https://huggingface.co/NLP-EXP/QE6 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-qse_en.md b/docs/_posts/ahmedlone127/2023-09-13-qse_en.md new file mode 100644 index 00000000000000..ac1c35f3a24fce --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-qse_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English qse BertEmbeddings from NLP-EXP +author: John Snow Labs +name: qse +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`qse` is a English model originally trained by NLP-EXP. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/qse_en_5.1.1_3.0_1694582574251.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/qse_en_5.1.1_3.0_1694582574251.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("qse","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("qse", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|qse| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|504.0 MB| + +## References + +https://huggingface.co/NLP-EXP/QSE \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-qsr_ar.md b/docs/_posts/ahmedlone127/2023-09-13-qsr_ar.md new file mode 100644 index 00000000000000..18104a5f339a1a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-qsr_ar.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Arabic qsr BertEmbeddings from NLP-EXP +author: John Snow Labs +name: qsr +date: 2023-09-13 +tags: [bert, ar, open_source, fill_mask, onnx] +task: Embeddings +language: ar +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`qsr` is a Arabic model originally trained by NLP-EXP. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/qsr_ar_5.1.1_3.0_1694580143110.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/qsr_ar_5.1.1_3.0_1694580143110.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("qsr","ar") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("qsr", "ar") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|qsr| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|ar| +|Size:|504.0 MB| + +## References + +https://huggingface.co/NLP-EXP/QSR \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-qsrt_ar.md b/docs/_posts/ahmedlone127/2023-09-13-qsrt_ar.md new file mode 100644 index 00000000000000..80bf5b476a74de --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-qsrt_ar.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Arabic qsrt BertEmbeddings from NLP-EXP +author: John Snow Labs +name: qsrt +date: 2023-09-13 +tags: [bert, ar, open_source, fill_mask, onnx] +task: Embeddings +language: ar +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`qsrt` is a Arabic model originally trained by NLP-EXP. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/qsrt_ar_5.1.1_3.0_1694579804623.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/qsrt_ar_5.1.1_3.0_1694579804623.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("qsrt","ar") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("qsrt", "ar") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|qsrt| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|ar| +|Size:|504.2 MB| + +## References + +https://huggingface.co/NLP-EXP/QSRT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-qst_ar.md b/docs/_posts/ahmedlone127/2023-09-13-qst_ar.md new file mode 100644 index 00000000000000..34eb8c95c873e7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-qst_ar.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Arabic qst BertEmbeddings from NLP-EXP +author: John Snow Labs +name: qst +date: 2023-09-13 +tags: [bert, ar, open_source, fill_mask, onnx] +task: Embeddings +language: ar +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`qst` is a Arabic model originally trained by NLP-EXP. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/qst_ar_5.1.1_3.0_1694579959539.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/qst_ar_5.1.1_3.0_1694579959539.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("qst","ar") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("qst", "ar") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|qst| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|ar| +|Size:|504.0 MB| + +## References + +https://huggingface.co/NLP-EXP/QST \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-recipe_bert_base_uncased_en.md b/docs/_posts/ahmedlone127/2023-09-13-recipe_bert_base_uncased_en.md new file mode 100644 index 00000000000000..e3eae7db825d7a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-recipe_bert_base_uncased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English recipe_bert_base_uncased BertEmbeddings from AnonymousSub +author: John Snow Labs +name: recipe_bert_base_uncased +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`recipe_bert_base_uncased` is a English model originally trained by AnonymousSub. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/recipe_bert_base_uncased_en_5.1.1_3.0_1694577388934.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/recipe_bert_base_uncased_en_5.1.1_3.0_1694577388934.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("recipe_bert_base_uncased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("recipe_bert_base_uncased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|recipe_bert_base_uncased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/AnonymousSub/recipe-bert-base-uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-retbert_en.md b/docs/_posts/ahmedlone127/2023-09-13-retbert_en.md new file mode 100644 index 00000000000000..a04449fd7fe3b0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-retbert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English retbert BertEmbeddings from ThePixOne +author: John Snow Labs +name: retbert +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`retbert` is a English model originally trained by ThePixOne. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/retbert_en_5.1.1_3.0_1694572305505.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/retbert_en_5.1.1_3.0_1694572305505.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("retbert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("retbert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|retbert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/ThePixOne/retBERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-rubert_base_ru.md b/docs/_posts/ahmedlone127/2023-09-13-rubert_base_ru.md new file mode 100644 index 00000000000000..890f7c7e49be59 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-rubert_base_ru.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Russian rubert_base BertEmbeddings from ai-forever +author: John Snow Labs +name: rubert_base +date: 2023-09-13 +tags: [bert, ru, open_source, fill_mask, onnx] +task: Embeddings +language: ru +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`rubert_base` is a Russian model originally trained by ai-forever. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/rubert_base_ru_5.1.1_3.0_1694571215568.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/rubert_base_ru_5.1.1_3.0_1694571215568.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("rubert_base","ru") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("rubert_base", "ru") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|rubert_base| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|ru| +|Size:|667.0 MB| + +## References + +https://huggingface.co/ai-forever/ruBert-base \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-rubert_large_ru.md b/docs/_posts/ahmedlone127/2023-09-13-rubert_large_ru.md new file mode 100644 index 00000000000000..4215dc9aff15ed --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-rubert_large_ru.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Russian rubert_large BertEmbeddings from ai-forever +author: John Snow Labs +name: rubert_large +date: 2023-09-13 +tags: [bert, ru, open_source, fill_mask, onnx] +task: Embeddings +language: ru +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`rubert_large` is a Russian model originally trained by ai-forever. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/rubert_large_ru_5.1.1_3.0_1694571586736.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/rubert_large_ru_5.1.1_3.0_1694571586736.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("rubert_large","ru") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("rubert_large", "ru") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|rubert_large| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|ru| +|Size:|1.6 GB| + +## References + +https://huggingface.co/ai-forever/ruBert-large \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-rubiobert_en.md b/docs/_posts/ahmedlone127/2023-09-13-rubiobert_en.md new file mode 100644 index 00000000000000..709b81962d538b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-rubiobert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English rubiobert BertEmbeddings from alexyalunin +author: John Snow Labs +name: rubiobert +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`rubiobert` is a English model originally trained by alexyalunin. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/rubiobert_en_5.1.1_3.0_1694578324460.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/rubiobert_en_5.1.1_3.0_1694578324460.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("rubiobert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("rubiobert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|rubiobert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|667.1 MB| + +## References + +https://huggingface.co/alexyalunin/RuBioBERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-scibert_lm_v1_finetuned_20_en.md b/docs/_posts/ahmedlone127/2023-09-13-scibert_lm_v1_finetuned_20_en.md new file mode 100644 index 00000000000000..c5e0519e83ef15 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-scibert_lm_v1_finetuned_20_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English scibert_lm_v1_finetuned_20 BertEmbeddings from ariesutiono +author: John Snow Labs +name: scibert_lm_v1_finetuned_20 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`scibert_lm_v1_finetuned_20` is a English model originally trained by ariesutiono. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/scibert_lm_v1_finetuned_20_en_5.1.1_3.0_1694564841995.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/scibert_lm_v1_finetuned_20_en_5.1.1_3.0_1694564841995.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("scibert_lm_v1_finetuned_20","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("scibert_lm_v1_finetuned_20", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|scibert_lm_v1_finetuned_20| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|499.5 MB| + +## References + +https://huggingface.co/ariesutiono/scibert-lm-v1-finetuned-20 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-scibert_lm_v2_finetuned_20_en.md b/docs/_posts/ahmedlone127/2023-09-13-scibert_lm_v2_finetuned_20_en.md new file mode 100644 index 00000000000000..ee7b79a92a3f4f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-scibert_lm_v2_finetuned_20_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English scibert_lm_v2_finetuned_20 BertEmbeddings from ariesutiono +author: John Snow Labs +name: scibert_lm_v2_finetuned_20 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`scibert_lm_v2_finetuned_20` is a English model originally trained by ariesutiono. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/scibert_lm_v2_finetuned_20_en_5.1.1_3.0_1694565081199.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/scibert_lm_v2_finetuned_20_en_5.1.1_3.0_1694565081199.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("scibert_lm_v2_finetuned_20","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("scibert_lm_v2_finetuned_20", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|scibert_lm_v2_finetuned_20| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|499.5 MB| + +## References + +https://huggingface.co/ariesutiono/scibert-lm-v2-finetuned-20 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-scibert_scivocab_uncased_finetuned_scibero_en.md b/docs/_posts/ahmedlone127/2023-09-13-scibert_scivocab_uncased_finetuned_scibero_en.md new file mode 100644 index 00000000000000..777c51d6e921d2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-scibert_scivocab_uncased_finetuned_scibero_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English scibert_scivocab_uncased_finetuned_scibero BertEmbeddings from Transabrar +author: John Snow Labs +name: scibert_scivocab_uncased_finetuned_scibero +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`scibert_scivocab_uncased_finetuned_scibero` is a English model originally trained by Transabrar. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/scibert_scivocab_uncased_finetuned_scibero_en_5.1.1_3.0_1694572669211.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/scibert_scivocab_uncased_finetuned_scibero_en_5.1.1_3.0_1694572669211.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("scibert_scivocab_uncased_finetuned_scibero","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("scibert_scivocab_uncased_finetuned_scibero", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|scibert_scivocab_uncased_finetuned_scibero| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|410.0 MB| + +## References + +https://huggingface.co/Transabrar/scibert_scivocab_uncased-finetuned-scibero \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-scibert_scivocab_uncased_finetuned_scibert_agu_abstracts_en.md b/docs/_posts/ahmedlone127/2023-09-13-scibert_scivocab_uncased_finetuned_scibert_agu_abstracts_en.md new file mode 100644 index 00000000000000..4170c091360902 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-scibert_scivocab_uncased_finetuned_scibert_agu_abstracts_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English scibert_scivocab_uncased_finetuned_scibert_agu_abstracts BertEmbeddings from arminmehrabian +author: John Snow Labs +name: scibert_scivocab_uncased_finetuned_scibert_agu_abstracts +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`scibert_scivocab_uncased_finetuned_scibert_agu_abstracts` is a English model originally trained by arminmehrabian. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/scibert_scivocab_uncased_finetuned_scibert_agu_abstracts_en_5.1.1_3.0_1694563751306.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/scibert_scivocab_uncased_finetuned_scibert_agu_abstracts_en_5.1.1_3.0_1694563751306.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("scibert_scivocab_uncased_finetuned_scibert_agu_abstracts","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("scibert_scivocab_uncased_finetuned_scibert_agu_abstracts", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|scibert_scivocab_uncased_finetuned_scibert_agu_abstracts| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|409.9 MB| + +## References + +https://huggingface.co/arminmehrabian/scibert_scivocab_uncased-finetuned-scibert-agu-abstracts \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-scibert_scivocab_uncased_long_4096_en.md b/docs/_posts/ahmedlone127/2023-09-13-scibert_scivocab_uncased_long_4096_en.md new file mode 100644 index 00000000000000..56b07713c1bea8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-scibert_scivocab_uncased_long_4096_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English scibert_scivocab_uncased_long_4096 BertEmbeddings from yorko +author: John Snow Labs +name: scibert_scivocab_uncased_long_4096 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`scibert_scivocab_uncased_long_4096` is a English model originally trained by yorko. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/scibert_scivocab_uncased_long_4096_en_5.1.1_3.0_1694587393144.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/scibert_scivocab_uncased_long_4096_en_5.1.1_3.0_1694587393144.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("scibert_scivocab_uncased_long_4096","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("scibert_scivocab_uncased_long_4096", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|scibert_scivocab_uncased_long_4096| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|420.3 MB| + +## References + +https://huggingface.co/yorko/scibert_scivocab_uncased_long_4096 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-scibert_wechsel_korean_en.md b/docs/_posts/ahmedlone127/2023-09-13-scibert_wechsel_korean_en.md new file mode 100644 index 00000000000000..3ee8667dc06922 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-scibert_wechsel_korean_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English scibert_wechsel_korean BertEmbeddings from LeverageX +author: John Snow Labs +name: scibert_wechsel_korean +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`scibert_wechsel_korean` is a English model originally trained by LeverageX. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/scibert_wechsel_korean_en_5.1.1_3.0_1694566166641.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/scibert_wechsel_korean_en_5.1.1_3.0_1694566166641.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("scibert_wechsel_korean","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("scibert_wechsel_korean", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|scibert_wechsel_korean| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|412.4 MB| + +## References + +https://huggingface.co/LeverageX/scibert-wechsel-korean \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-segabert_large_en.md b/docs/_posts/ahmedlone127/2023-09-13-segabert_large_en.md new file mode 100644 index 00000000000000..d47852fa81c05c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-segabert_large_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English segabert_large BertEmbeddings from rsvp-AI-ca +author: John Snow Labs +name: segabert_large +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`segabert_large` is a English model originally trained by rsvp-AI-ca. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/segabert_large_en_5.1.1_3.0_1694568994445.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/segabert_large_en_5.1.1_3.0_1694568994445.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("segabert_large","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("segabert_large", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|segabert_large| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|941.1 MB| + +## References + +https://huggingface.co/rsvp-AI-ca/segabert-large \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-sentmae_beir_en.md b/docs/_posts/ahmedlone127/2023-09-13-sentmae_beir_en.md new file mode 100644 index 00000000000000..b797ac70350b25 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-sentmae_beir_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English sentmae_beir BertEmbeddings from SamuelYang +author: John Snow Labs +name: sentmae_beir +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sentmae_beir` is a English model originally trained by SamuelYang. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sentmae_beir_en_5.1.1_3.0_1694567238490.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sentmae_beir_en_5.1.1_3.0_1694567238490.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("sentmae_beir","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("sentmae_beir", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sentmae_beir| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.6 MB| + +## References + +https://huggingface.co/SamuelYang/SentMAE_BEIR \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-sentmae_en.md b/docs/_posts/ahmedlone127/2023-09-13-sentmae_en.md new file mode 100644 index 00000000000000..c71ac5be26922e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-sentmae_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English sentmae BertEmbeddings from SamuelYang +author: John Snow Labs +name: sentmae +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sentmae` is a English model originally trained by SamuelYang. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sentmae_en_5.1.1_3.0_1694567054783.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sentmae_en_5.1.1_3.0_1694567054783.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("sentmae","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("sentmae", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sentmae| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.6 MB| + +## References + +https://huggingface.co/SamuelYang/SentMAE \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-sentmae_msmarco_en.md b/docs/_posts/ahmedlone127/2023-09-13-sentmae_msmarco_en.md new file mode 100644 index 00000000000000..a26ae4ff2a2120 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-sentmae_msmarco_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English sentmae_msmarco BertEmbeddings from SamuelYang +author: John Snow Labs +name: sentmae_msmarco +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sentmae_msmarco` is a English model originally trained by SamuelYang. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sentmae_msmarco_en_5.1.1_3.0_1694567599891.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sentmae_msmarco_en_5.1.1_3.0_1694567599891.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("sentmae_msmarco","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("sentmae_msmarco", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sentmae_msmarco| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.7 MB| + +## References + +https://huggingface.co/SamuelYang/SentMAE_MSMARCO \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-sikubert_zh.md b/docs/_posts/ahmedlone127/2023-09-13-sikubert_zh.md new file mode 100644 index 00000000000000..1f8bbc0e6e1c9a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-sikubert_zh.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Chinese sikubert BertEmbeddings from SIKU-BERT +author: John Snow Labs +name: sikubert +date: 2023-09-13 +tags: [bert, zh, open_source, fill_mask, onnx] +task: Embeddings +language: zh +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sikubert` is a Chinese model originally trained by SIKU-BERT. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sikubert_zh_5.1.1_3.0_1694570261032.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sikubert_zh_5.1.1_3.0_1694570261032.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("sikubert","zh") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("sikubert", "zh") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sikubert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|zh| +|Size:|406.0 MB| + +## References + +https://huggingface.co/SIKU-BERT/sikubert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-simlm_base_msmarco_en.md b/docs/_posts/ahmedlone127/2023-09-13-simlm_base_msmarco_en.md new file mode 100644 index 00000000000000..bd9a20c9fe4cd8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-simlm_base_msmarco_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English simlm_base_msmarco BertEmbeddings from intfloat +author: John Snow Labs +name: simlm_base_msmarco +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`simlm_base_msmarco` is a English model originally trained by intfloat. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/simlm_base_msmarco_en_5.1.1_3.0_1694566597314.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/simlm_base_msmarco_en_5.1.1_3.0_1694566597314.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("simlm_base_msmarco","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("simlm_base_msmarco", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|simlm_base_msmarco| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/intfloat/simlm-base-msmarco \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-simlm_base_wiki100w_en.md b/docs/_posts/ahmedlone127/2023-09-13-simlm_base_wiki100w_en.md new file mode 100644 index 00000000000000..5d653265f03a17 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-simlm_base_wiki100w_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English simlm_base_wiki100w BertEmbeddings from intfloat +author: John Snow Labs +name: simlm_base_wiki100w +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`simlm_base_wiki100w` is a English model originally trained by intfloat. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/simlm_base_wiki100w_en_5.1.1_3.0_1694566769454.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/simlm_base_wiki100w_en_5.1.1_3.0_1694566769454.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("simlm_base_wiki100w","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("simlm_base_wiki100w", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|simlm_base_wiki100w| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.8 MB| + +## References + +https://huggingface.co/intfloat/simlm-base-wiki100w \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-slim_beir_scifact_old_en.md b/docs/_posts/ahmedlone127/2023-09-13-slim_beir_scifact_old_en.md new file mode 100644 index 00000000000000..994317be139b8b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-slim_beir_scifact_old_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English slim_beir_scifact_old BertEmbeddings from kinoo +author: John Snow Labs +name: slim_beir_scifact_old +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`slim_beir_scifact_old` is a English model originally trained by kinoo. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/slim_beir_scifact_old_en_5.1.1_3.0_1694578412848.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/slim_beir_scifact_old_en_5.1.1_3.0_1694578412848.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("slim_beir_scifact_old","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("slim_beir_scifact_old", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|slim_beir_scifact_old| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/kinoo/slim_beir-scifact-old \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-slimr_msmarco_passage_en.md b/docs/_posts/ahmedlone127/2023-09-13-slimr_msmarco_passage_en.md new file mode 100644 index 00000000000000..d8f59ad8aa8f32 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-slimr_msmarco_passage_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English slimr_msmarco_passage BertEmbeddings from castorini +author: John Snow Labs +name: slimr_msmarco_passage +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`slimr_msmarco_passage` is a English model originally trained by castorini. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/slimr_msmarco_passage_en_5.1.1_3.0_1694577246538.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/slimr_msmarco_passage_en_5.1.1_3.0_1694577246538.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("slimr_msmarco_passage","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("slimr_msmarco_passage", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|slimr_msmarco_passage| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/castorini/slimr-msmarco-passage \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-small_mlm_glue_qnli_from_scratch_custom_tokenizer_expand_vocab_en.md b/docs/_posts/ahmedlone127/2023-09-13-small_mlm_glue_qnli_from_scratch_custom_tokenizer_expand_vocab_en.md new file mode 100644 index 00000000000000..9f23d6657fee5d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-small_mlm_glue_qnli_from_scratch_custom_tokenizer_expand_vocab_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English small_mlm_glue_qnli_from_scratch_custom_tokenizer_expand_vocab BertEmbeddings from muhtasham +author: John Snow Labs +name: small_mlm_glue_qnli_from_scratch_custom_tokenizer_expand_vocab +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`small_mlm_glue_qnli_from_scratch_custom_tokenizer_expand_vocab` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/small_mlm_glue_qnli_from_scratch_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694563331949.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/small_mlm_glue_qnli_from_scratch_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694563331949.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("small_mlm_glue_qnli_from_scratch_custom_tokenizer_expand_vocab","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("small_mlm_glue_qnli_from_scratch_custom_tokenizer_expand_vocab", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|small_mlm_glue_qnli_from_scratch_custom_tokenizer_expand_vocab| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|135.2 MB| + +## References + +https://huggingface.co/muhtasham/small-mlm-glue-qnli-from-scratch-custom-tokenizer-expand-vocab \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-small_mlm_glue_qqp_from_scratch_custom_tokenizer_expand_vocab_en.md b/docs/_posts/ahmedlone127/2023-09-13-small_mlm_glue_qqp_from_scratch_custom_tokenizer_expand_vocab_en.md new file mode 100644 index 00000000000000..0c2d846bfce26d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-small_mlm_glue_qqp_from_scratch_custom_tokenizer_expand_vocab_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English small_mlm_glue_qqp_from_scratch_custom_tokenizer_expand_vocab BertEmbeddings from muhtasham +author: John Snow Labs +name: small_mlm_glue_qqp_from_scratch_custom_tokenizer_expand_vocab +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`small_mlm_glue_qqp_from_scratch_custom_tokenizer_expand_vocab` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/small_mlm_glue_qqp_from_scratch_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694563524788.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/small_mlm_glue_qqp_from_scratch_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694563524788.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("small_mlm_glue_qqp_from_scratch_custom_tokenizer_expand_vocab","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("small_mlm_glue_qqp_from_scratch_custom_tokenizer_expand_vocab", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|small_mlm_glue_qqp_from_scratch_custom_tokenizer_expand_vocab| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|122.8 MB| + +## References + +https://huggingface.co/muhtasham/small-mlm-glue-qqp-from-scratch-custom-tokenizer-expand-vocab \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-small_mlm_glue_rte_custom_tokenizer_expand_vocab_en.md b/docs/_posts/ahmedlone127/2023-09-13-small_mlm_glue_rte_custom_tokenizer_expand_vocab_en.md new file mode 100644 index 00000000000000..69cca9ff8c53e6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-small_mlm_glue_rte_custom_tokenizer_expand_vocab_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English small_mlm_glue_rte_custom_tokenizer_expand_vocab BertEmbeddings from muhtasham +author: John Snow Labs +name: small_mlm_glue_rte_custom_tokenizer_expand_vocab +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`small_mlm_glue_rte_custom_tokenizer_expand_vocab` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/small_mlm_glue_rte_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694563639389.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/small_mlm_glue_rte_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694563639389.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("small_mlm_glue_rte_custom_tokenizer_expand_vocab","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("small_mlm_glue_rte_custom_tokenizer_expand_vocab", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|small_mlm_glue_rte_custom_tokenizer_expand_vocab| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|126.2 MB| + +## References + +https://huggingface.co/muhtasham/small-mlm-glue-rte-custom-tokenizer-expand-vocab \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-small_mlm_glue_rte_from_scratch_custom_tokenizer_expand_vocab_en.md b/docs/_posts/ahmedlone127/2023-09-13-small_mlm_glue_rte_from_scratch_custom_tokenizer_expand_vocab_en.md new file mode 100644 index 00000000000000..2bd82070b0129b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-small_mlm_glue_rte_from_scratch_custom_tokenizer_expand_vocab_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English small_mlm_glue_rte_from_scratch_custom_tokenizer_expand_vocab BertEmbeddings from muhtasham +author: John Snow Labs +name: small_mlm_glue_rte_from_scratch_custom_tokenizer_expand_vocab +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`small_mlm_glue_rte_from_scratch_custom_tokenizer_expand_vocab` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/small_mlm_glue_rte_from_scratch_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694563734576.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/small_mlm_glue_rte_from_scratch_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694563734576.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("small_mlm_glue_rte_from_scratch_custom_tokenizer_expand_vocab","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("small_mlm_glue_rte_from_scratch_custom_tokenizer_expand_vocab", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|small_mlm_glue_rte_from_scratch_custom_tokenizer_expand_vocab| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|126.3 MB| + +## References + +https://huggingface.co/muhtasham/small-mlm-glue-rte-from-scratch-custom-tokenizer-expand-vocab \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-small_mlm_glue_sst2_custom_tokenizer_expand_vocab_en.md b/docs/_posts/ahmedlone127/2023-09-13-small_mlm_glue_sst2_custom_tokenizer_expand_vocab_en.md new file mode 100644 index 00000000000000..5a93507c7457ed --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-small_mlm_glue_sst2_custom_tokenizer_expand_vocab_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English small_mlm_glue_sst2_custom_tokenizer_expand_vocab BertEmbeddings from muhtasham +author: John Snow Labs +name: small_mlm_glue_sst2_custom_tokenizer_expand_vocab +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`small_mlm_glue_sst2_custom_tokenizer_expand_vocab` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/small_mlm_glue_sst2_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694563829652.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/small_mlm_glue_sst2_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694563829652.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("small_mlm_glue_sst2_custom_tokenizer_expand_vocab","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("small_mlm_glue_sst2_custom_tokenizer_expand_vocab", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|small_mlm_glue_sst2_custom_tokenizer_expand_vocab| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|120.8 MB| + +## References + +https://huggingface.co/muhtasham/small-mlm-glue-sst2-custom-tokenizer-expand-vocab \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-small_mlm_glue_sst2_from_scratch_custom_tokenizer_expand_vocab_en.md b/docs/_posts/ahmedlone127/2023-09-13-small_mlm_glue_sst2_from_scratch_custom_tokenizer_expand_vocab_en.md new file mode 100644 index 00000000000000..826dafefb44cac --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-small_mlm_glue_sst2_from_scratch_custom_tokenizer_expand_vocab_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English small_mlm_glue_sst2_from_scratch_custom_tokenizer_expand_vocab BertEmbeddings from muhtasham +author: John Snow Labs +name: small_mlm_glue_sst2_from_scratch_custom_tokenizer_expand_vocab +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`small_mlm_glue_sst2_from_scratch_custom_tokenizer_expand_vocab` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/small_mlm_glue_sst2_from_scratch_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694563918718.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/small_mlm_glue_sst2_from_scratch_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694563918718.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("small_mlm_glue_sst2_from_scratch_custom_tokenizer_expand_vocab","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("small_mlm_glue_sst2_from_scratch_custom_tokenizer_expand_vocab", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|small_mlm_glue_sst2_from_scratch_custom_tokenizer_expand_vocab| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|120.8 MB| + +## References + +https://huggingface.co/muhtasham/small-mlm-glue-sst2-from-scratch-custom-tokenizer-expand-vocab \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-small_mlm_glue_stsb_custom_tokenizer_expand_vocab_en.md b/docs/_posts/ahmedlone127/2023-09-13-small_mlm_glue_stsb_custom_tokenizer_expand_vocab_en.md new file mode 100644 index 00000000000000..11d672c756d5fd --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-small_mlm_glue_stsb_custom_tokenizer_expand_vocab_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English small_mlm_glue_stsb_custom_tokenizer_expand_vocab BertEmbeddings from muhtasham +author: John Snow Labs +name: small_mlm_glue_stsb_custom_tokenizer_expand_vocab +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`small_mlm_glue_stsb_custom_tokenizer_expand_vocab` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/small_mlm_glue_stsb_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694564136292.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/small_mlm_glue_stsb_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694564136292.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("small_mlm_glue_stsb_custom_tokenizer_expand_vocab","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("small_mlm_glue_stsb_custom_tokenizer_expand_vocab", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|small_mlm_glue_stsb_custom_tokenizer_expand_vocab| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|117.4 MB| + +## References + +https://huggingface.co/muhtasham/small-mlm-glue-stsb-custom-tokenizer-expand-vocab \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-small_mlm_glue_stsb_from_scratch_custom_tokenizer_expand_vocab_en.md b/docs/_posts/ahmedlone127/2023-09-13-small_mlm_glue_stsb_from_scratch_custom_tokenizer_expand_vocab_en.md new file mode 100644 index 00000000000000..6eac2463e75411 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-small_mlm_glue_stsb_from_scratch_custom_tokenizer_expand_vocab_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English small_mlm_glue_stsb_from_scratch_custom_tokenizer_expand_vocab BertEmbeddings from muhtasham +author: John Snow Labs +name: small_mlm_glue_stsb_from_scratch_custom_tokenizer_expand_vocab +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`small_mlm_glue_stsb_from_scratch_custom_tokenizer_expand_vocab` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/small_mlm_glue_stsb_from_scratch_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694564017601.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/small_mlm_glue_stsb_from_scratch_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694564017601.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("small_mlm_glue_stsb_from_scratch_custom_tokenizer_expand_vocab","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("small_mlm_glue_stsb_from_scratch_custom_tokenizer_expand_vocab", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|small_mlm_glue_stsb_from_scratch_custom_tokenizer_expand_vocab| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|117.4 MB| + +## References + +https://huggingface.co/muhtasham/small-mlm-glue-stsb-from-scratch-custom-tokenizer-expand-vocab \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-small_mlm_glue_wnli_from_scratch_custom_tokenizer_expand_vocab_en.md b/docs/_posts/ahmedlone127/2023-09-13-small_mlm_glue_wnli_from_scratch_custom_tokenizer_expand_vocab_en.md new file mode 100644 index 00000000000000..18c0b2d5019597 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-small_mlm_glue_wnli_from_scratch_custom_tokenizer_expand_vocab_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English small_mlm_glue_wnli_from_scratch_custom_tokenizer_expand_vocab BertEmbeddings from muhtasham +author: John Snow Labs +name: small_mlm_glue_wnli_from_scratch_custom_tokenizer_expand_vocab +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`small_mlm_glue_wnli_from_scratch_custom_tokenizer_expand_vocab` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/small_mlm_glue_wnli_from_scratch_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694564212872.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/small_mlm_glue_wnli_from_scratch_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694564212872.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("small_mlm_glue_wnli_from_scratch_custom_tokenizer_expand_vocab","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("small_mlm_glue_wnli_from_scratch_custom_tokenizer_expand_vocab", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|small_mlm_glue_wnli_from_scratch_custom_tokenizer_expand_vocab| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|108.1 MB| + +## References + +https://huggingface.co/muhtasham/small-mlm-glue-wnli-from-scratch-custom-tokenizer-expand-vocab \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-spbert_mlm_base_en.md b/docs/_posts/ahmedlone127/2023-09-13-spbert_mlm_base_en.md new file mode 100644 index 00000000000000..dc85b7d7c9991e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-spbert_mlm_base_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English spbert_mlm_base BertEmbeddings from razent +author: John Snow Labs +name: spbert_mlm_base +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`spbert_mlm_base` is a English model originally trained by razent. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/spbert_mlm_base_en_5.1.1_3.0_1694566338968.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/spbert_mlm_base_en_5.1.1_3.0_1694566338968.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("spbert_mlm_base","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("spbert_mlm_base", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|spbert_mlm_base| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.6 MB| + +## References + +https://huggingface.co/razent/spbert-mlm-base \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-spbert_mlm_wso_base_en.md b/docs/_posts/ahmedlone127/2023-09-13-spbert_mlm_wso_base_en.md new file mode 100644 index 00000000000000..36b6d2733147e3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-spbert_mlm_wso_base_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English spbert_mlm_wso_base BertEmbeddings from razent +author: John Snow Labs +name: spbert_mlm_wso_base +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`spbert_mlm_wso_base` is a English model originally trained by razent. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/spbert_mlm_wso_base_en_5.1.1_3.0_1694566556828.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/spbert_mlm_wso_base_en_5.1.1_3.0_1694566556828.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("spbert_mlm_wso_base","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("spbert_mlm_wso_base", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|spbert_mlm_wso_base| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.5 MB| + +## References + +https://huggingface.co/razent/spbert-mlm-wso-base \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-splade_cocondenser_selfdistil_baseplate_en.md b/docs/_posts/ahmedlone127/2023-09-13-splade_cocondenser_selfdistil_baseplate_en.md new file mode 100644 index 00000000000000..a6d5e2441fc700 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-splade_cocondenser_selfdistil_baseplate_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English splade_cocondenser_selfdistil_baseplate BertEmbeddings from baseplate +author: John Snow Labs +name: splade_cocondenser_selfdistil_baseplate +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`splade_cocondenser_selfdistil_baseplate` is a English model originally trained by baseplate. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/splade_cocondenser_selfdistil_baseplate_en_5.1.1_3.0_1694585076462.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/splade_cocondenser_selfdistil_baseplate_en_5.1.1_3.0_1694585076462.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("splade_cocondenser_selfdistil_baseplate","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("splade_cocondenser_selfdistil_baseplate", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|splade_cocondenser_selfdistil_baseplate| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.4 MB| + +## References + +https://huggingface.co/baseplate/splade-cocondenser-selfdistil \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-sw_v1_sw.md b/docs/_posts/ahmedlone127/2023-09-13-sw_v1_sw.md new file mode 100644 index 00000000000000..1b5b9b390cacf3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-sw_v1_sw.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Swahili (macrolanguage) sw_v1 BertEmbeddings from eolang +author: John Snow Labs +name: sw_v1 +date: 2023-09-13 +tags: [bert, sw, open_source, fill_mask, onnx] +task: Embeddings +language: sw +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sw_v1` is a Swahili (macrolanguage) model originally trained by eolang. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sw_v1_sw_5.1.1_3.0_1694578806650.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sw_v1_sw_5.1.1_3.0_1694578806650.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("sw_v1","sw") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("sw_v1", "sw") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sw_v1| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|sw| +|Size:|408.0 MB| + +## References + +https://huggingface.co/eolang/SW-v1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-test_telsayed_en.md b/docs/_posts/ahmedlone127/2023-09-13-test_telsayed_en.md new file mode 100644 index 00000000000000..a4d1b32d58d99d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-test_telsayed_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English test_telsayed BertEmbeddings from telsayed +author: John Snow Labs +name: test_telsayed +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`test_telsayed` is a English model originally trained by telsayed. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/test_telsayed_en_5.1.1_3.0_1694576902353.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/test_telsayed_en_5.1.1_3.0_1694576902353.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("test_telsayed","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("test_telsayed", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|test_telsayed| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/telsayed/test \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-tiny_a_2_h_2_en.md b/docs/_posts/ahmedlone127/2023-09-13-tiny_a_2_h_2_en.md new file mode 100644 index 00000000000000..e9217ac1e45314 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-tiny_a_2_h_2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tiny_a_2_h_2 BertEmbeddings from KamrusSamad +author: John Snow Labs +name: tiny_a_2_h_2 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tiny_a_2_h_2` is a English model originally trained by KamrusSamad. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tiny_a_2_h_2_en_5.1.1_3.0_1694564331760.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tiny_a_2_h_2_en_5.1.1_3.0_1694564331760.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tiny_a_2_h_2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tiny_a_2_h_2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tiny_a_2_h_2| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|16.7 MB| + +## References + +https://huggingface.co/KamrusSamad/tiny_A-2_H-2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-tiny_clinicalbert_en.md b/docs/_posts/ahmedlone127/2023-09-13-tiny_clinicalbert_en.md new file mode 100644 index 00000000000000..47f42829007e45 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-tiny_clinicalbert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tiny_clinicalbert BertEmbeddings from nlpie +author: John Snow Labs +name: tiny_clinicalbert +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tiny_clinicalbert` is a English model originally trained by nlpie. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tiny_clinicalbert_en_5.1.1_3.0_1694573934228.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tiny_clinicalbert_en_5.1.1_3.0_1694573934228.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tiny_clinicalbert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tiny_clinicalbert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tiny_clinicalbert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|51.9 MB| + +## References + +https://huggingface.co/nlpie/tiny-clinicalbert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-tiny_mlm_glue_wnli_from_scratch_custom_tokenizer_expand_vocab_en.md b/docs/_posts/ahmedlone127/2023-09-13-tiny_mlm_glue_wnli_from_scratch_custom_tokenizer_expand_vocab_en.md new file mode 100644 index 00000000000000..4fa075f99a585a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-tiny_mlm_glue_wnli_from_scratch_custom_tokenizer_expand_vocab_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tiny_mlm_glue_wnli_from_scratch_custom_tokenizer_expand_vocab BertEmbeddings from muhtasham +author: John Snow Labs +name: tiny_mlm_glue_wnli_from_scratch_custom_tokenizer_expand_vocab +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tiny_mlm_glue_wnli_from_scratch_custom_tokenizer_expand_vocab` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tiny_mlm_glue_wnli_from_scratch_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694563249016.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tiny_mlm_glue_wnli_from_scratch_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694563249016.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tiny_mlm_glue_wnli_from_scratch_custom_tokenizer_expand_vocab","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tiny_mlm_glue_wnli_from_scratch_custom_tokenizer_expand_vocab", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tiny_mlm_glue_wnli_from_scratch_custom_tokenizer_expand_vocab| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|16.7 MB| + +## References + +https://huggingface.co/muhtasham/tiny-mlm-glue-wnli-from-scratch-custom-tokenizer-expand-vocab \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-tlm_ag_medium_scale_en.md b/docs/_posts/ahmedlone127/2023-09-13-tlm_ag_medium_scale_en.md new file mode 100644 index 00000000000000..49b628bcf918ab --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-tlm_ag_medium_scale_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tlm_ag_medium_scale BertEmbeddings from yxchar +author: John Snow Labs +name: tlm_ag_medium_scale +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tlm_ag_medium_scale` is a English model originally trained by yxchar. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tlm_ag_medium_scale_en_5.1.1_3.0_1694588336137.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tlm_ag_medium_scale_en_5.1.1_3.0_1694588336137.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tlm_ag_medium_scale","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tlm_ag_medium_scale", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tlm_ag_medium_scale| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|408.3 MB| + +## References + +https://huggingface.co/yxchar/tlm-ag-medium-scale \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-topic_erica_bert_en.md b/docs/_posts/ahmedlone127/2023-09-13-topic_erica_bert_en.md new file mode 100644 index 00000000000000..5cd4d3f913f1b4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-topic_erica_bert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English topic_erica_bert BertEmbeddings from Kdogs +author: John Snow Labs +name: topic_erica_bert +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`topic_erica_bert` is a English model originally trained by Kdogs. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/topic_erica_bert_en_5.1.1_3.0_1694565067616.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/topic_erica_bert_en_5.1.1_3.0_1694565067616.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("topic_erica_bert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("topic_erica_bert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|topic_erica_bert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|412.4 MB| + +## References + +https://huggingface.co/Kdogs/topic_erica_bert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-transformer_exercise_01_en.md b/docs/_posts/ahmedlone127/2023-09-13-transformer_exercise_01_en.md new file mode 100644 index 00000000000000..45e31f24879095 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-transformer_exercise_01_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English transformer_exercise_01 BertEmbeddings from alangganggang +author: John Snow Labs +name: transformer_exercise_01 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`transformer_exercise_01` is a English model originally trained by alangganggang. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/transformer_exercise_01_en_5.1.1_3.0_1694577887359.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/transformer_exercise_01_en_5.1.1_3.0_1694577887359.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("transformer_exercise_01","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("transformer_exercise_01", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|transformer_exercise_01| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/alangganggang/transformer_exercise_01 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-tsonga_test_en.md b/docs/_posts/ahmedlone127/2023-09-13-tsonga_test_en.md new file mode 100644 index 00000000000000..acb9374ea0b290 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-tsonga_test_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tsonga_test BertEmbeddings from taesu +author: John Snow Labs +name: tsonga_test +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tsonga_test` is a English model originally trained by taesu. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tsonga_test_en_5.1.1_3.0_1694576071612.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tsonga_test_en_5.1.1_3.0_1694576071612.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tsonga_test","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tsonga_test", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tsonga_test| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.7 MB| + +## References + +https://huggingface.co/taesu/ts-test \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-tulio_bert_es.md b/docs/_posts/ahmedlone127/2023-09-13-tulio_bert_es.md new file mode 100644 index 00000000000000..7d68cf213c0215 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-tulio_bert_es.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Castilian, Spanish tulio_bert BertEmbeddings from jorgeortizfuentes +author: John Snow Labs +name: tulio_bert +date: 2023-09-13 +tags: [bert, es, open_source, fill_mask, onnx] +task: Embeddings +language: es +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tulio_bert` is a Castilian, Spanish model originally trained by jorgeortizfuentes. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tulio_bert_es_5.1.1_3.0_1694570756756.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tulio_bert_es_5.1.1_3.0_1694570756756.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tulio_bert","es") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tulio_bert", "es") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tulio_bert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|es| +|Size:|409.0 MB| + +## References + +https://huggingface.co/jorgeortizfuentes/tulio-bert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-wineberto_italian_cased_it.md b/docs/_posts/ahmedlone127/2023-09-13-wineberto_italian_cased_it.md new file mode 100644 index 00000000000000..3909a41f8d79b2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-wineberto_italian_cased_it.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Italian wineberto_italian_cased BertEmbeddings from vinhood +author: John Snow Labs +name: wineberto_italian_cased +date: 2023-09-13 +tags: [bert, it, open_source, fill_mask, onnx] +task: Embeddings +language: it +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`wineberto_italian_cased` is a Italian model originally trained by vinhood. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/wineberto_italian_cased_it_5.1.1_3.0_1694581896429.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/wineberto_italian_cased_it_5.1.1_3.0_1694581896429.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("wineberto_italian_cased","it") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("wineberto_italian_cased", "it") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|wineberto_italian_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|it| +|Size:|412.6 MB| + +## References + +https://huggingface.co/vinhood/wineberto-italian-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-wobert_chinese_plus_zh.md b/docs/_posts/ahmedlone127/2023-09-13-wobert_chinese_plus_zh.md new file mode 100644 index 00000000000000..1959e2400b1f9d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-wobert_chinese_plus_zh.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Chinese wobert_chinese_plus BertEmbeddings from qinluo +author: John Snow Labs +name: wobert_chinese_plus +date: 2023-09-13 +tags: [bert, zh, open_source, fill_mask, onnx] +task: Embeddings +language: zh +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`wobert_chinese_plus` is a Chinese model originally trained by qinluo. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/wobert_chinese_plus_zh_5.1.1_3.0_1694565904418.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/wobert_chinese_plus_zh_5.1.1_3.0_1694565904418.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("wobert_chinese_plus","zh") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("wobert_chinese_plus", "zh") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|wobert_chinese_plus| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|zh| +|Size:|464.5 MB| + +## References + +https://huggingface.co/qinluo/wobert-chinese-plus \ No newline at end of file