From b9d05223d240bccc2cb4f71132206013507cad9b Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 14:35:13 +0700 Subject: [PATCH 001/301] Add model 2023-10-25-bert_ft_qqp_79_jeevesh8_en --- .../2023-10-25-bert_ft_qqp_79_jeevesh8_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_79_jeevesh8_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_79_jeevesh8_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_79_jeevesh8_en.md new file mode 100644 index 00000000000000..4c5c45e5978045 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_79_jeevesh8_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_ft_qqp_79_jeevesh8 BertForSequenceClassification from Jeevesh8 +author: John Snow Labs +name: bert_ft_qqp_79_jeevesh8 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_ft_qqp_79_jeevesh8` is a English model originally trained by Jeevesh8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_ft_qqp_79_jeevesh8_en_5.1.4_3.4_1698219225696.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_ft_qqp_79_jeevesh8_en_5.1.4_3.4_1698219225696.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_ft_qqp_79_jeevesh8","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_ft_qqp_79_jeevesh8","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_ft_qqp_79_jeevesh8| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Jeevesh8/bert_ft_qqp-79 \ No newline at end of file From 76028b1e7902702bdaa12e9dfcdbc59beed78847 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 14:36:13 +0700 Subject: [PATCH 002/301] Add model 2023-10-25-covid_twitter_bert_v2_1_4_2e_05_0_01_en --- ...covid_twitter_bert_v2_1_4_2e_05_0_01_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-covid_twitter_bert_v2_1_4_2e_05_0_01_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-covid_twitter_bert_v2_1_4_2e_05_0_01_en.md b/docs/_posts/ahmedlone127/2023-10-25-covid_twitter_bert_v2_1_4_2e_05_0_01_en.md new file mode 100644 index 00000000000000..266876cec82bdd --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-covid_twitter_bert_v2_1_4_2e_05_0_01_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English covid_twitter_bert_v2_1_4_2e_05_0_01 BertForSequenceClassification from JerryYanJiang +author: John Snow Labs +name: covid_twitter_bert_v2_1_4_2e_05_0_01 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`covid_twitter_bert_v2_1_4_2e_05_0_01` is a English model originally trained by JerryYanJiang. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/covid_twitter_bert_v2_1_4_2e_05_0_01_en_5.1.4_3.4_1698219195720.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/covid_twitter_bert_v2_1_4_2e_05_0_01_en_5.1.4_3.4_1698219195720.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("covid_twitter_bert_v2_1_4_2e_05_0_01","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("covid_twitter_bert_v2_1_4_2e_05_0_01","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|covid_twitter_bert_v2_1_4_2e_05_0_01| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/JerryYanJiang/covid-twitter-bert-v2_1_4_2e-05_0.01 \ No newline at end of file From dfc22a207d43a8a590528bc6f87cec7ea436944d Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 14:37:14 +0700 Subject: [PATCH 003/301] Add model 2023-10-25-bert_base_uncased_ade_ade_corpus_v2_en --- ...-bert_base_uncased_ade_ade_corpus_v2_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_base_uncased_ade_ade_corpus_v2_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_base_uncased_ade_ade_corpus_v2_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_base_uncased_ade_ade_corpus_v2_en.md new file mode 100644 index 00000000000000..78fae46dc05da7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_base_uncased_ade_ade_corpus_v2_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_base_uncased_ade_ade_corpus_v2 BertForSequenceClassification from Jorgeutd +author: John Snow Labs +name: bert_base_uncased_ade_ade_corpus_v2 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_ade_ade_corpus_v2` is a English model originally trained by Jorgeutd. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_ade_ade_corpus_v2_en_5.1.4_3.4_1698219322282.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_ade_ade_corpus_v2_en_5.1.4_3.4_1698219322282.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_base_uncased_ade_ade_corpus_v2","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_base_uncased_ade_ade_corpus_v2","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_ade_ade_corpus_v2| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Jorgeutd/bert-base-uncased-ade-Ade-corpus-v2 \ No newline at end of file From b0a64ed90e869768a57dd34e2716450b106eae0f Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 14:38:14 +0700 Subject: [PATCH 004/301] Add model 2023-10-25-bert_ft_qqp_80_jeevesh8_en --- .../2023-10-25-bert_ft_qqp_80_jeevesh8_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_80_jeevesh8_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_80_jeevesh8_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_80_jeevesh8_en.md new file mode 100644 index 00000000000000..d6625282269809 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_80_jeevesh8_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_ft_qqp_80_jeevesh8 BertForSequenceClassification from Jeevesh8 +author: John Snow Labs +name: bert_ft_qqp_80_jeevesh8 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_ft_qqp_80_jeevesh8` is a English model originally trained by Jeevesh8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_ft_qqp_80_jeevesh8_en_5.1.4_3.4_1698219407321.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_ft_qqp_80_jeevesh8_en_5.1.4_3.4_1698219407321.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_ft_qqp_80_jeevesh8","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_ft_qqp_80_jeevesh8","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_ft_qqp_80_jeevesh8| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Jeevesh8/bert_ft_qqp-80 \ No newline at end of file From 1ee9ff508816ed10a0490de3e76458f1ba525bdb Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 14:39:14 +0700 Subject: [PATCH 005/301] Add model 2023-10-25-mengzi_bert_base_fin_wallstreetcn_morning_news_market_overview_open_ssec_f1_v1_en --- ...news_market_overview_open_ssec_f1_v1_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-mengzi_bert_base_fin_wallstreetcn_morning_news_market_overview_open_ssec_f1_v1_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-mengzi_bert_base_fin_wallstreetcn_morning_news_market_overview_open_ssec_f1_v1_en.md b/docs/_posts/ahmedlone127/2023-10-25-mengzi_bert_base_fin_wallstreetcn_morning_news_market_overview_open_ssec_f1_v1_en.md new file mode 100644 index 00000000000000..7609c326cdf263 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-mengzi_bert_base_fin_wallstreetcn_morning_news_market_overview_open_ssec_f1_v1_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English mengzi_bert_base_fin_wallstreetcn_morning_news_market_overview_open_ssec_f1_v1 BertForSequenceClassification from hw2942 +author: John Snow Labs +name: mengzi_bert_base_fin_wallstreetcn_morning_news_market_overview_open_ssec_f1_v1 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mengzi_bert_base_fin_wallstreetcn_morning_news_market_overview_open_ssec_f1_v1` is a English model originally trained by hw2942. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mengzi_bert_base_fin_wallstreetcn_morning_news_market_overview_open_ssec_f1_v1_en_5.1.4_3.4_1698219379071.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mengzi_bert_base_fin_wallstreetcn_morning_news_market_overview_open_ssec_f1_v1_en_5.1.4_3.4_1698219379071.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("mengzi_bert_base_fin_wallstreetcn_morning_news_market_overview_open_ssec_f1_v1","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("mengzi_bert_base_fin_wallstreetcn_morning_news_market_overview_open_ssec_f1_v1","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mengzi_bert_base_fin_wallstreetcn_morning_news_market_overview_open_ssec_f1_v1| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|383.1 MB| + +## References + +https://huggingface.co/hw2942/mengzi-bert-base-fin-wallstreetcn-morning-news-market-overview-open-SSEC-f1-v1 \ No newline at end of file From 74b011471254334e8f07c713e43b45d5496e09a8 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 14:40:14 +0700 Subject: [PATCH 006/301] Add model 2023-10-25-bert_sdg_french_fr --- .../2023-10-25-bert_sdg_french_fr.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_sdg_french_fr.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_sdg_french_fr.md b/docs/_posts/ahmedlone127/2023-10-25-bert_sdg_french_fr.md new file mode 100644 index 00000000000000..ada1dbaeb13105 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_sdg_french_fr.md @@ -0,0 +1,97 @@ +--- +layout: model +title: French bert_sdg_french BertForSequenceClassification from ilovebots +author: John Snow Labs +name: bert_sdg_french +date: 2023-10-25 +tags: [bert, fr, open_source, sequence_classification, onnx] +task: Text Classification +language: fr +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_sdg_french` is a French model originally trained by ilovebots. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_sdg_french_fr_5.1.4_3.4_1698214647394.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_sdg_french_fr_5.1.4_3.4_1698214647394.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_sdg_french","fr")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_sdg_french","fr") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_sdg_french| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|fr| +|Size:|414.6 MB| + +## References + +https://huggingface.co/ilovebots/bert-sdg-french \ No newline at end of file From 2696348f61f9f3a32a179e23312ab9c7f38793ae Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 14:41:14 +0700 Subject: [PATCH 007/301] Add model 2023-10-25-bert_ft_qqp_81_jeevesh8_en --- .../2023-10-25-bert_ft_qqp_81_jeevesh8_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_81_jeevesh8_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_81_jeevesh8_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_81_jeevesh8_en.md new file mode 100644 index 00000000000000..ed95812dd8dbe0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_81_jeevesh8_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_ft_qqp_81_jeevesh8 BertForSequenceClassification from Jeevesh8 +author: John Snow Labs +name: bert_ft_qqp_81_jeevesh8 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_ft_qqp_81_jeevesh8` is a English model originally trained by Jeevesh8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_ft_qqp_81_jeevesh8_en_5.1.4_3.4_1698219587212.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_ft_qqp_81_jeevesh8_en_5.1.4_3.4_1698219587212.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_ft_qqp_81_jeevesh8","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_ft_qqp_81_jeevesh8","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_ft_qqp_81_jeevesh8| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Jeevesh8/bert_ft_qqp-81 \ No newline at end of file From 188d75a22fa58942f1c6657b2cbc222bc521fd77 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 14:42:15 +0700 Subject: [PATCH 008/301] Add model 2023-10-25-bert_base_chinese_wallstreetcn_morning_news_market_overview_open_ssec_f1_v1_en --- ...news_market_overview_open_ssec_f1_v1_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_base_chinese_wallstreetcn_morning_news_market_overview_open_ssec_f1_v1_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_base_chinese_wallstreetcn_morning_news_market_overview_open_ssec_f1_v1_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_base_chinese_wallstreetcn_morning_news_market_overview_open_ssec_f1_v1_en.md new file mode 100644 index 00000000000000..4ec59ff1d51af6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_base_chinese_wallstreetcn_morning_news_market_overview_open_ssec_f1_v1_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_base_chinese_wallstreetcn_morning_news_market_overview_open_ssec_f1_v1 BertForSequenceClassification from hw2942 +author: John Snow Labs +name: bert_base_chinese_wallstreetcn_morning_news_market_overview_open_ssec_f1_v1 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_chinese_wallstreetcn_morning_news_market_overview_open_ssec_f1_v1` is a English model originally trained by hw2942. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_chinese_wallstreetcn_morning_news_market_overview_open_ssec_f1_v1_en_5.1.4_3.4_1698219578005.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_chinese_wallstreetcn_morning_news_market_overview_open_ssec_f1_v1_en_5.1.4_3.4_1698219578005.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_base_chinese_wallstreetcn_morning_news_market_overview_open_ssec_f1_v1","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_base_chinese_wallstreetcn_morning_news_market_overview_open_ssec_f1_v1","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_chinese_wallstreetcn_morning_news_market_overview_open_ssec_f1_v1| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|383.3 MB| + +## References + +https://huggingface.co/hw2942/bert-base-chinese-wallstreetcn-morning-news-market-overview-open-SSEC-f1-v1 \ No newline at end of file From ff2acfc06e489af1d7cd2255ec8cbd2a1272299b Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 14:43:15 +0700 Subject: [PATCH 009/301] Add model 2023-10-25-bert_base_uncased_fine_tuned_on_clinc_oos_dataset_en --- ...ased_fine_tuned_on_clinc_oos_dataset_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_base_uncased_fine_tuned_on_clinc_oos_dataset_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_base_uncased_fine_tuned_on_clinc_oos_dataset_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_base_uncased_fine_tuned_on_clinc_oos_dataset_en.md new file mode 100644 index 00000000000000..abc4019940e9ef --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_base_uncased_fine_tuned_on_clinc_oos_dataset_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_base_uncased_fine_tuned_on_clinc_oos_dataset BertForSequenceClassification from itzo +author: John Snow Labs +name: bert_base_uncased_fine_tuned_on_clinc_oos_dataset +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_fine_tuned_on_clinc_oos_dataset` is a English model originally trained by itzo. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_fine_tuned_on_clinc_oos_dataset_en_5.1.4_3.4_1698212599389.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_fine_tuned_on_clinc_oos_dataset_en_5.1.4_3.4_1698212599389.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_base_uncased_fine_tuned_on_clinc_oos_dataset","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_base_uncased_fine_tuned_on_clinc_oos_dataset","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_fine_tuned_on_clinc_oos_dataset| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.8 MB| + +## References + +https://huggingface.co/itzo/bert-base-uncased-fine-tuned-on-clinc_oos-dataset \ No newline at end of file From fbb32cbd197e6093c175a58024b4603d62f7ae87 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 14:44:15 +0700 Subject: [PATCH 010/301] Add model 2023-10-25-bert_ft_qqp_33_jeevesh8_en --- .../2023-10-25-bert_ft_qqp_33_jeevesh8_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_33_jeevesh8_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_33_jeevesh8_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_33_jeevesh8_en.md new file mode 100644 index 00000000000000..f1e443cdd98700 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_33_jeevesh8_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_ft_qqp_33_jeevesh8 BertForSequenceClassification from Jeevesh8 +author: John Snow Labs +name: bert_ft_qqp_33_jeevesh8 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_ft_qqp_33_jeevesh8` is a English model originally trained by Jeevesh8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_ft_qqp_33_jeevesh8_en_5.1.4_3.4_1698210374266.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_ft_qqp_33_jeevesh8_en_5.1.4_3.4_1698210374266.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_ft_qqp_33_jeevesh8","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_ft_qqp_33_jeevesh8","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_ft_qqp_33_jeevesh8| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Jeevesh8/bert_ft_qqp-33 \ No newline at end of file From ecd50778f9c50fb8cf19c6bc647a13a38508a3da Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 14:45:15 +0700 Subject: [PATCH 011/301] Add model 2023-10-25-bert_cn_finetunning_jovenpai_en --- ...3-10-25-bert_cn_finetunning_jovenpai_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_cn_finetunning_jovenpai_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_cn_finetunning_jovenpai_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_cn_finetunning_jovenpai_en.md new file mode 100644 index 00000000000000..05260f273ec9ed --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_cn_finetunning_jovenpai_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_cn_finetunning_jovenpai BertForSequenceClassification from JovenPai +author: John Snow Labs +name: bert_cn_finetunning_jovenpai +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_cn_finetunning_jovenpai` is a English model originally trained by JovenPai. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_cn_finetunning_jovenpai_en_5.1.4_3.4_1698219841571.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_cn_finetunning_jovenpai_en_5.1.4_3.4_1698219841571.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_cn_finetunning_jovenpai","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_cn_finetunning_jovenpai","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_cn_finetunning_jovenpai| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|383.3 MB| + +## References + +https://huggingface.co/JovenPai/bert_cn_finetunning \ No newline at end of file From 3509980ffcd523bd0f09d3bf99143771d05cd1e8 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 14:46:15 +0700 Subject: [PATCH 012/301] Add model 2023-10-25-bert_ft_qqp_24_jeevesh8_en --- .../2023-10-25-bert_ft_qqp_24_jeevesh8_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_24_jeevesh8_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_24_jeevesh8_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_24_jeevesh8_en.md new file mode 100644 index 00000000000000..148e66ddfc49ad --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_24_jeevesh8_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_ft_qqp_24_jeevesh8 BertForSequenceClassification from Jeevesh8 +author: John Snow Labs +name: bert_ft_qqp_24_jeevesh8 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_ft_qqp_24_jeevesh8` is a English model originally trained by Jeevesh8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_ft_qqp_24_jeevesh8_en_5.1.4_3.4_1698208744152.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_ft_qqp_24_jeevesh8_en_5.1.4_3.4_1698208744152.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_ft_qqp_24_jeevesh8","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_ft_qqp_24_jeevesh8","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_ft_qqp_24_jeevesh8| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Jeevesh8/bert_ft_qqp-24 \ No newline at end of file From 266c01eeb8c5c980206b6478005ee8f445c1cdbc Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 14:47:16 +0700 Subject: [PATCH 013/301] Add model 2023-10-25-fine_tuned_indonli_basic_with_indobert_large_p2_afaji_en --- ...i_basic_with_indobert_large_p2_afaji_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-fine_tuned_indonli_basic_with_indobert_large_p2_afaji_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-fine_tuned_indonli_basic_with_indobert_large_p2_afaji_en.md b/docs/_posts/ahmedlone127/2023-10-25-fine_tuned_indonli_basic_with_indobert_large_p2_afaji_en.md new file mode 100644 index 00000000000000..60ae6ef7cfe365 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-fine_tuned_indonli_basic_with_indobert_large_p2_afaji_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English fine_tuned_indonli_basic_with_indobert_large_p2_afaji BertForSequenceClassification from afaji +author: John Snow Labs +name: fine_tuned_indonli_basic_with_indobert_large_p2_afaji +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`fine_tuned_indonli_basic_with_indobert_large_p2_afaji` is a English model originally trained by afaji. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/fine_tuned_indonli_basic_with_indobert_large_p2_afaji_en_5.1.4_3.4_1698219934268.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/fine_tuned_indonli_basic_with_indobert_large_p2_afaji_en_5.1.4_3.4_1698219934268.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("fine_tuned_indonli_basic_with_indobert_large_p2_afaji","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("fine_tuned_indonli_basic_with_indobert_large_p2_afaji","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|fine_tuned_indonli_basic_with_indobert_large_p2_afaji| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/afaji/fine-tuned-IndoNLI-Basic-with-indobert-large-p2 \ No newline at end of file From bdc1244cf6938d7e30798c5a6b310afea2001191 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 14:48:16 +0700 Subject: [PATCH 014/301] Add model 2023-10-25-bert_base_uncased_finetuned_mrpc_hilariooliveira_en --- ...cased_finetuned_mrpc_hilariooliveira_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_base_uncased_finetuned_mrpc_hilariooliveira_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_base_uncased_finetuned_mrpc_hilariooliveira_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_base_uncased_finetuned_mrpc_hilariooliveira_en.md new file mode 100644 index 00000000000000..7a312b503d735c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_base_uncased_finetuned_mrpc_hilariooliveira_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_base_uncased_finetuned_mrpc_hilariooliveira BertForSequenceClassification from hilariooliveira +author: John Snow Labs +name: bert_base_uncased_finetuned_mrpc_hilariooliveira +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_finetuned_mrpc_hilariooliveira` is a English model originally trained by hilariooliveira. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_finetuned_mrpc_hilariooliveira_en_5.1.4_3.4_1698219891498.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_finetuned_mrpc_hilariooliveira_en_5.1.4_3.4_1698219891498.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_base_uncased_finetuned_mrpc_hilariooliveira","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_base_uncased_finetuned_mrpc_hilariooliveira","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_finetuned_mrpc_hilariooliveira| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/hilariooliveira/bert-base-uncased-finetuned-mrpc \ No newline at end of file From 3c978246f83bde5a0d7994c3cafd9d3322a06342 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 14:49:16 +0700 Subject: [PATCH 015/301] Add model 2023-10-25-pipp_finder_bert_base_cased_en --- ...23-10-25-pipp_finder_bert_base_cased_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-pipp_finder_bert_base_cased_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-pipp_finder_bert_base_cased_en.md b/docs/_posts/ahmedlone127/2023-10-25-pipp_finder_bert_base_cased_en.md new file mode 100644 index 00000000000000..2ce48d13b73d85 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-pipp_finder_bert_base_cased_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English pipp_finder_bert_base_cased BertForSequenceClassification from cgpotts +author: John Snow Labs +name: pipp_finder_bert_base_cased +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`pipp_finder_bert_base_cased` is a English model originally trained by cgpotts. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/pipp_finder_bert_base_cased_en_5.1.4_3.4_1698220093324.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/pipp_finder_bert_base_cased_en_5.1.4_3.4_1698220093324.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("pipp_finder_bert_base_cased","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("pipp_finder_bert_base_cased","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|pipp_finder_bert_base_cased| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|405.9 MB| + +## References + +https://huggingface.co/cgpotts/pipp-finder-bert-base-cased \ No newline at end of file From f0df82bb1a94a716c827a181a4e4b59976b44cf0 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 14:50:17 +0700 Subject: [PATCH 016/301] Add model 2023-10-25-vashkontrol_sentiment_rubert_ru --- ...3-10-25-vashkontrol_sentiment_rubert_ru.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-vashkontrol_sentiment_rubert_ru.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-vashkontrol_sentiment_rubert_ru.md b/docs/_posts/ahmedlone127/2023-10-25-vashkontrol_sentiment_rubert_ru.md new file mode 100644 index 00000000000000..2c70fdca761862 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-vashkontrol_sentiment_rubert_ru.md @@ -0,0 +1,97 @@ +--- +layout: model +title: Russian vashkontrol_sentiment_rubert BertForSequenceClassification from kartashoffv +author: John Snow Labs +name: vashkontrol_sentiment_rubert +date: 2023-10-25 +tags: [bert, ru, open_source, sequence_classification, onnx] +task: Text Classification +language: ru +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`vashkontrol_sentiment_rubert` is a Russian model originally trained by kartashoffv. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/vashkontrol_sentiment_rubert_ru_5.1.4_3.4_1698206312526.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/vashkontrol_sentiment_rubert_ru_5.1.4_3.4_1698206312526.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("vashkontrol_sentiment_rubert","ru")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("vashkontrol_sentiment_rubert","ru") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|vashkontrol_sentiment_rubert| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|ru| +|Size:|666.5 MB| + +## References + +https://huggingface.co/kartashoffv/vashkontrol-sentiment-rubert \ No newline at end of file From 2f401a609caa41ffd2e973d60eef3647b8747743 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 14:51:17 +0700 Subject: [PATCH 017/301] Add model 2023-10-25-goog_bert_ft_cola_34_en --- .../2023-10-25-goog_bert_ft_cola_34_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_34_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_34_en.md b/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_34_en.md new file mode 100644 index 00000000000000..d1e5636311de44 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_34_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English goog_bert_ft_cola_34 BertForSequenceClassification from Jeevesh8 +author: John Snow Labs +name: goog_bert_ft_cola_34 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`goog_bert_ft_cola_34` is a English model originally trained by Jeevesh8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/goog_bert_ft_cola_34_en_5.1.4_3.4_1698220231904.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/goog_bert_ft_cola_34_en_5.1.4_3.4_1698220231904.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("goog_bert_ft_cola_34","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("goog_bert_ft_cola_34","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|goog_bert_ft_cola_34| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Jeevesh8/goog_bert_ft_cola-34 \ No newline at end of file From b4ebceed8d8b0e79474bb0c7a3047663b8255967 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 14:52:17 +0700 Subject: [PATCH 018/301] Add model 2023-10-25-bert_ft_qqp_73_jeevesh8_en --- .../2023-10-25-bert_ft_qqp_73_jeevesh8_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_73_jeevesh8_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_73_jeevesh8_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_73_jeevesh8_en.md new file mode 100644 index 00000000000000..44602a69184f7c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_73_jeevesh8_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_ft_qqp_73_jeevesh8 BertForSequenceClassification from Jeevesh8 +author: John Snow Labs +name: bert_ft_qqp_73_jeevesh8 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_ft_qqp_73_jeevesh8` is a English model originally trained by Jeevesh8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_ft_qqp_73_jeevesh8_en_5.1.4_3.4_1698218011258.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_ft_qqp_73_jeevesh8_en_5.1.4_3.4_1698218011258.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_ft_qqp_73_jeevesh8","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_ft_qqp_73_jeevesh8","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_ft_qqp_73_jeevesh8| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Jeevesh8/bert_ft_qqp-73 \ No newline at end of file From 18c699bde8398e9d2cce4e7f9469ca9eec10d6d4 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 14:53:17 +0700 Subject: [PATCH 019/301] Add model 2023-10-25-fine_tuned_indonli_translated_with_indobert_base_uncased_afaji_en --- ...ted_with_indobert_base_uncased_afaji_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-fine_tuned_indonli_translated_with_indobert_base_uncased_afaji_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-fine_tuned_indonli_translated_with_indobert_base_uncased_afaji_en.md b/docs/_posts/ahmedlone127/2023-10-25-fine_tuned_indonli_translated_with_indobert_base_uncased_afaji_en.md new file mode 100644 index 00000000000000..b6bb021fec9193 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-fine_tuned_indonli_translated_with_indobert_base_uncased_afaji_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English fine_tuned_indonli_translated_with_indobert_base_uncased_afaji BertForSequenceClassification from afaji +author: John Snow Labs +name: fine_tuned_indonli_translated_with_indobert_base_uncased_afaji +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`fine_tuned_indonli_translated_with_indobert_base_uncased_afaji` is a English model originally trained by afaji. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/fine_tuned_indonli_translated_with_indobert_base_uncased_afaji_en_5.1.4_3.4_1698219456786.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/fine_tuned_indonli_translated_with_indobert_base_uncased_afaji_en_5.1.4_3.4_1698219456786.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("fine_tuned_indonli_translated_with_indobert_base_uncased_afaji","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("fine_tuned_indonli_translated_with_indobert_base_uncased_afaji","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|fine_tuned_indonli_translated_with_indobert_base_uncased_afaji| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|413.9 MB| + +## References + +https://huggingface.co/afaji/fine-tuned-IndoNLI-Translated-with-indobert-base-uncased \ No newline at end of file From a8ced37fc56e11f4f30e8c360cebcfdbc34b2651 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 14:54:17 +0700 Subject: [PATCH 020/301] Add model 2023-10-25-bert_finetunning_test_itcastai_en --- ...10-25-bert_finetunning_test_itcastai_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_finetunning_test_itcastai_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_finetunning_test_itcastai_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_finetunning_test_itcastai_en.md new file mode 100644 index 00000000000000..9340812df09e45 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_finetunning_test_itcastai_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_finetunning_test_itcastai BertForSequenceClassification from ItcastAI +author: John Snow Labs +name: bert_finetunning_test_itcastai +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_finetunning_test_itcastai` is a English model originally trained by ItcastAI. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_finetunning_test_itcastai_en_5.1.4_3.4_1698211833653.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_finetunning_test_itcastai_en_5.1.4_3.4_1698211833653.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_finetunning_test_itcastai","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_finetunning_test_itcastai","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_finetunning_test_itcastai| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/ItcastAI/bert_finetunning_test \ No newline at end of file From 368e6f40aa46660c46e1ae4f775a3d78523a4765 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 14:55:17 +0700 Subject: [PATCH 021/301] Add model 2023-10-25-bert_sequence_classifier_base_uncased_finetuned_surveyclassification_en --- ...cased_finetuned_surveyclassification_en.md | 100 ++++++++++++++++++ 1 file changed, 100 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_sequence_classifier_base_uncased_finetuned_surveyclassification_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_sequence_classifier_base_uncased_finetuned_surveyclassification_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_sequence_classifier_base_uncased_finetuned_surveyclassification_en.md new file mode 100644 index 00000000000000..49d58adbd7092c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_sequence_classifier_base_uncased_finetuned_surveyclassification_en.md @@ -0,0 +1,100 @@ +--- +layout: model +title: English BertForSequenceClassification Base Uncased model (from Jorgeutd) +author: John Snow Labs +name: bert_sequence_classifier_base_uncased_finetuned_surveyclassification +date: 2023-10-25 +tags: [en, open_source, bert, sequence_classification, ner, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `bert-base-uncased-finetuned-surveyclassification` is a English model originally trained by `Jorgeutd`. + +## Predicted Entities + +`positive`, `neutral`, `negative` + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_sequence_classifier_base_uncased_finetuned_surveyclassification_en_5.1.4_3.4_1698219648617.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_sequence_classifier_base_uncased_finetuned_surveyclassification_en_5.1.4_3.4_1698219648617.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_sequence_classifier_base_uncased_finetuned_surveyclassification","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("class") + +pipeline = Pipeline(stages=[documentAssembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_sequence_classifier_base_uncased_finetuned_surveyclassification","en") + .setInputCols(Array("document", "token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_sequence_classifier_base_uncased_finetuned_surveyclassification| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|409.4 MB| +|Case sensitive:|true| +|Max sentence length:|128| + +## References + +References + +- https://huggingface.co/Jorgeutd/bert-base-uncased-finetuned-surveyclassification \ No newline at end of file From 387783db695cebcaa53ad9fe1121539d04520fbe Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 14:56:18 +0700 Subject: [PATCH 022/301] Add model 2023-10-25-bert_ft_qqp_85_jeevesh8_en --- .../2023-10-25-bert_ft_qqp_85_jeevesh8_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_85_jeevesh8_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_85_jeevesh8_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_85_jeevesh8_en.md new file mode 100644 index 00000000000000..b6b91b836dafc4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_85_jeevesh8_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_ft_qqp_85_jeevesh8 BertForSequenceClassification from Jeevesh8 +author: John Snow Labs +name: bert_ft_qqp_85_jeevesh8 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_ft_qqp_85_jeevesh8` is a English model originally trained by Jeevesh8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_ft_qqp_85_jeevesh8_en_5.1.4_3.4_1698220376391.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_ft_qqp_85_jeevesh8_en_5.1.4_3.4_1698220376391.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_ft_qqp_85_jeevesh8","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_ft_qqp_85_jeevesh8","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_ft_qqp_85_jeevesh8| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Jeevesh8/bert_ft_qqp-85 \ No newline at end of file From 6789a0e946e14de882a361b1c8d646e903a7390b Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 14:57:18 +0700 Subject: [PATCH 023/301] Add model 2023-10-25-bert_ft_qqp_6_jeevesh8_en --- .../2023-10-25-bert_ft_qqp_6_jeevesh8_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_6_jeevesh8_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_6_jeevesh8_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_6_jeevesh8_en.md new file mode 100644 index 00000000000000..68ec5ec38c6f42 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_6_jeevesh8_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_ft_qqp_6_jeevesh8 BertForSequenceClassification from Jeevesh8 +author: John Snow Labs +name: bert_ft_qqp_6_jeevesh8 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_ft_qqp_6_jeevesh8` is a English model originally trained by Jeevesh8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_ft_qqp_6_jeevesh8_en_5.1.4_3.4_1698205246513.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_ft_qqp_6_jeevesh8_en_5.1.4_3.4_1698205246513.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_ft_qqp_6_jeevesh8","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_ft_qqp_6_jeevesh8","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_ft_qqp_6_jeevesh8| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Jeevesh8/bert_ft_qqp-6 \ No newline at end of file From 80a8275afabf17e9e080cf9a3fa7eb6209463c70 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 14:58:18 +0700 Subject: [PATCH 024/301] Add model 2023-10-25-bert_base_uncased_finetuned_iemocap2_en --- ...bert_base_uncased_finetuned_iemocap2_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_base_uncased_finetuned_iemocap2_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_base_uncased_finetuned_iemocap2_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_base_uncased_finetuned_iemocap2_en.md new file mode 100644 index 00000000000000..e36ecf8fb073f3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_base_uncased_finetuned_iemocap2_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_base_uncased_finetuned_iemocap2 BertForSequenceClassification from Zahra99 +author: John Snow Labs +name: bert_base_uncased_finetuned_iemocap2 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_finetuned_iemocap2` is a English model originally trained by Zahra99. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_finetuned_iemocap2_en_5.1.4_3.4_1698214367259.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_finetuned_iemocap2_en_5.1.4_3.4_1698214367259.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_base_uncased_finetuned_iemocap2","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_base_uncased_finetuned_iemocap2","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_finetuned_iemocap2| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Zahra99/bert-base-uncased-finetuned-iemocap2 \ No newline at end of file From d9b4a3339e72807352c769d879cc6d5b3a6f7c99 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 14:59:19 +0700 Subject: [PATCH 025/301] Add model 2023-10-25-mengzi_bert_base_fin_wallstreetcn_morning_news_market_overview_open_ssec_v2_en --- ...ng_news_market_overview_open_ssec_v2_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-mengzi_bert_base_fin_wallstreetcn_morning_news_market_overview_open_ssec_v2_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-mengzi_bert_base_fin_wallstreetcn_morning_news_market_overview_open_ssec_v2_en.md b/docs/_posts/ahmedlone127/2023-10-25-mengzi_bert_base_fin_wallstreetcn_morning_news_market_overview_open_ssec_v2_en.md new file mode 100644 index 00000000000000..db46416dce0017 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-mengzi_bert_base_fin_wallstreetcn_morning_news_market_overview_open_ssec_v2_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English mengzi_bert_base_fin_wallstreetcn_morning_news_market_overview_open_ssec_v2 BertForSequenceClassification from hw2942 +author: John Snow Labs +name: mengzi_bert_base_fin_wallstreetcn_morning_news_market_overview_open_ssec_v2 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mengzi_bert_base_fin_wallstreetcn_morning_news_market_overview_open_ssec_v2` is a English model originally trained by hw2942. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mengzi_bert_base_fin_wallstreetcn_morning_news_market_overview_open_ssec_v2_en_5.1.4_3.4_1698220731080.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mengzi_bert_base_fin_wallstreetcn_morning_news_market_overview_open_ssec_v2_en_5.1.4_3.4_1698220731080.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("mengzi_bert_base_fin_wallstreetcn_morning_news_market_overview_open_ssec_v2","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("mengzi_bert_base_fin_wallstreetcn_morning_news_market_overview_open_ssec_v2","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mengzi_bert_base_fin_wallstreetcn_morning_news_market_overview_open_ssec_v2| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|383.1 MB| + +## References + +https://huggingface.co/hw2942/mengzi-bert-base-fin-wallstreetcn-morning-news-market-overview-open-SSEC-v2 \ No newline at end of file From a8e73e8f16f9245f02cfb181f16324f3db461641 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 15:00:19 +0700 Subject: [PATCH 026/301] Add model 2023-10-25-bert_ft_qqp_87_jeevesh8_en --- .../2023-10-25-bert_ft_qqp_87_jeevesh8_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_87_jeevesh8_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_87_jeevesh8_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_87_jeevesh8_en.md new file mode 100644 index 00000000000000..bec5d19ff4e595 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_87_jeevesh8_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_ft_qqp_87_jeevesh8 BertForSequenceClassification from Jeevesh8 +author: John Snow Labs +name: bert_ft_qqp_87_jeevesh8 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_ft_qqp_87_jeevesh8` is a English model originally trained by Jeevesh8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_ft_qqp_87_jeevesh8_en_5.1.4_3.4_1698220763383.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_ft_qqp_87_jeevesh8_en_5.1.4_3.4_1698220763383.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_ft_qqp_87_jeevesh8","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_ft_qqp_87_jeevesh8","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_ft_qqp_87_jeevesh8| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Jeevesh8/bert_ft_qqp-87 \ No newline at end of file From a92c57a321dc48942d72f6b951c89a819bc3e330 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 15:01:19 +0700 Subject: [PATCH 027/301] Add model 2023-10-25-goog_bert_ft_cola_41_en --- .../2023-10-25-goog_bert_ft_cola_41_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_41_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_41_en.md b/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_41_en.md new file mode 100644 index 00000000000000..98b5a18461d1ed --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_41_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English goog_bert_ft_cola_41 BertForSequenceClassification from Jeevesh8 +author: John Snow Labs +name: goog_bert_ft_cola_41 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`goog_bert_ft_cola_41` is a English model originally trained by Jeevesh8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/goog_bert_ft_cola_41_en_5.1.4_3.4_1698220788822.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/goog_bert_ft_cola_41_en_5.1.4_3.4_1698220788822.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("goog_bert_ft_cola_41","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("goog_bert_ft_cola_41","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|goog_bert_ft_cola_41| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Jeevesh8/goog_bert_ft_cola-41 \ No newline at end of file From c08196e57547e18e3f490d121efa60fb3775f19c Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 15:02:19 +0700 Subject: [PATCH 028/301] Add model 2023-10-25-bert_pre_doc_en --- .../2023-10-25-bert_pre_doc_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_pre_doc_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_pre_doc_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_pre_doc_en.md new file mode 100644 index 00000000000000..75e5c6f1d1b618 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_pre_doc_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_pre_doc BertForSequenceClassification from LilaBoualili +author: John Snow Labs +name: bert_pre_doc +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_pre_doc` is a English model originally trained by LilaBoualili. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_pre_doc_en_5.1.4_3.4_1698220828401.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_pre_doc_en_5.1.4_3.4_1698220828401.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_pre_doc","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_pre_doc","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_pre_doc| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.5 MB| + +## References + +https://huggingface.co/LilaBoualili/bert-pre-doc \ No newline at end of file From 9930192b590a5c2da1895d8a9320c122ee447835 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 15:03:19 +0700 Subject: [PATCH 029/301] Add model 2023-10-25-bert_base_chinese_wallstreetcn_morning_news_market_overview_open_ssec_v2_en --- ...ng_news_market_overview_open_ssec_v2_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_base_chinese_wallstreetcn_morning_news_market_overview_open_ssec_v2_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_base_chinese_wallstreetcn_morning_news_market_overview_open_ssec_v2_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_base_chinese_wallstreetcn_morning_news_market_overview_open_ssec_v2_en.md new file mode 100644 index 00000000000000..ad82bdcfa957ab --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_base_chinese_wallstreetcn_morning_news_market_overview_open_ssec_v2_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_base_chinese_wallstreetcn_morning_news_market_overview_open_ssec_v2 BertForSequenceClassification from hw2942 +author: John Snow Labs +name: bert_base_chinese_wallstreetcn_morning_news_market_overview_open_ssec_v2 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_chinese_wallstreetcn_morning_news_market_overview_open_ssec_v2` is a English model originally trained by hw2942. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_chinese_wallstreetcn_morning_news_market_overview_open_ssec_v2_en_5.1.4_3.4_1698220941999.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_chinese_wallstreetcn_morning_news_market_overview_open_ssec_v2_en_5.1.4_3.4_1698220941999.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_base_chinese_wallstreetcn_morning_news_market_overview_open_ssec_v2","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_base_chinese_wallstreetcn_morning_news_market_overview_open_ssec_v2","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_chinese_wallstreetcn_morning_news_market_overview_open_ssec_v2| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|383.3 MB| + +## References + +https://huggingface.co/hw2942/bert-base-chinese-wallstreetcn-morning-news-market-overview-open-SSEC-v2 \ No newline at end of file From c35aff19af68b85d893a95920a37cc2850706438 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 15:04:19 +0700 Subject: [PATCH 030/301] Add model 2023-10-25-bert_pre_pair_en --- .../2023-10-25-bert_pre_pair_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_pre_pair_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_pre_pair_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_pre_pair_en.md new file mode 100644 index 00000000000000..8c09fb2a1ae7e0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_pre_pair_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_pre_pair BertForSequenceClassification from LilaBoualili +author: John Snow Labs +name: bert_pre_pair +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_pre_pair` is a English model originally trained by LilaBoualili. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_pre_pair_en_5.1.4_3.4_1698221017879.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_pre_pair_en_5.1.4_3.4_1698221017879.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_pre_pair","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_pre_pair","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_pre_pair| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.5 MB| + +## References + +https://huggingface.co/LilaBoualili/bert-pre-pair \ No newline at end of file From 780d1801619f6803cd40b5af919b646e78526881 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 15:05:19 +0700 Subject: [PATCH 031/301] Add model 2023-10-25-goog_bert_ft_cola_38_en --- .../2023-10-25-goog_bert_ft_cola_38_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_38_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_38_en.md b/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_38_en.md new file mode 100644 index 00000000000000..b82bfd6d3288aa --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_38_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English goog_bert_ft_cola_38 BertForSequenceClassification from Jeevesh8 +author: John Snow Labs +name: goog_bert_ft_cola_38 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`goog_bert_ft_cola_38` is a English model originally trained by Jeevesh8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/goog_bert_ft_cola_38_en_5.1.4_3.4_1698220977338.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/goog_bert_ft_cola_38_en_5.1.4_3.4_1698220977338.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("goog_bert_ft_cola_38","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("goog_bert_ft_cola_38","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|goog_bert_ft_cola_38| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Jeevesh8/goog_bert_ft_cola-38 \ No newline at end of file From 3fe81122f33c1501489e069beef619753bd8e9cc Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 15:06:20 +0700 Subject: [PATCH 032/301] Add model 2023-10-25-covid_twitter_bert_v2_epoch3_batch4_lr2e_05_w0_01_en --- ..._bert_v2_epoch3_batch4_lr2e_05_w0_01_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-covid_twitter_bert_v2_epoch3_batch4_lr2e_05_w0_01_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-covid_twitter_bert_v2_epoch3_batch4_lr2e_05_w0_01_en.md b/docs/_posts/ahmedlone127/2023-10-25-covid_twitter_bert_v2_epoch3_batch4_lr2e_05_w0_01_en.md new file mode 100644 index 00000000000000..5376352c5b5c55 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-covid_twitter_bert_v2_epoch3_batch4_lr2e_05_w0_01_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English covid_twitter_bert_v2_epoch3_batch4_lr2e_05_w0_01 BertForSequenceClassification from JerryYanJiang +author: John Snow Labs +name: covid_twitter_bert_v2_epoch3_batch4_lr2e_05_w0_01 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`covid_twitter_bert_v2_epoch3_batch4_lr2e_05_w0_01` is a English model originally trained by JerryYanJiang. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/covid_twitter_bert_v2_epoch3_batch4_lr2e_05_w0_01_en_5.1.4_3.4_1698217732233.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/covid_twitter_bert_v2_epoch3_batch4_lr2e_05_w0_01_en_5.1.4_3.4_1698217732233.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("covid_twitter_bert_v2_epoch3_batch4_lr2e_05_w0_01","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("covid_twitter_bert_v2_epoch3_batch4_lr2e_05_w0_01","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|covid_twitter_bert_v2_epoch3_batch4_lr2e_05_w0_01| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/JerryYanJiang/covid-twitter-bert-v2_epoch3_batch4_lr2e-05_w0.01 \ No newline at end of file From 835055fb138c4ec27dec3e1bd3111e9d69e93f88 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 15:07:20 +0700 Subject: [PATCH 033/301] Add model 2023-10-25-bert_sequence_classifier_dehate_mono_portugese_pt --- ...nce_classifier_dehate_mono_portugese_pt.md | 102 ++++++++++++++++++ 1 file changed, 102 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_sequence_classifier_dehate_mono_portugese_pt.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_sequence_classifier_dehate_mono_portugese_pt.md b/docs/_posts/ahmedlone127/2023-10-25-bert_sequence_classifier_dehate_mono_portugese_pt.md new file mode 100644 index 00000000000000..5d054e173420dd --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_sequence_classifier_dehate_mono_portugese_pt.md @@ -0,0 +1,102 @@ +--- +layout: model +title: Portuguese BertForSequenceClassification Cased model (from Hate-speech-CNERG) +author: John Snow Labs +name: bert_sequence_classifier_dehate_mono_portugese +date: 2023-10-25 +tags: [pt, open_source, bert, sequence_classification, ner, onnx] +task: Named Entity Recognition +language: pt +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `dehatebert-mono-portugese` is a Portuguese model originally trained by `Hate-speech-CNERG`. + +## Predicted Entities + +`NON_HATE`, `HATE` + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_sequence_classifier_dehate_mono_portugese_pt_5.1.4_3.4_1698208710232.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_sequence_classifier_dehate_mono_portugese_pt_5.1.4_3.4_1698208710232.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_sequence_classifier_dehate_mono_portugese","pt") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("class") + +pipeline = Pipeline(stages=[documentAssembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_sequence_classifier_dehate_mono_portugese","pt") + .setInputCols(Array("document", "token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_sequence_classifier_dehate_mono_portugese| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|pt| +|Size:|627.7 MB| +|Case sensitive:|true| +|Max sentence length:|128| + +## References + +References + +- https://huggingface.co/Hate-speech-CNERG/dehatebert-mono-portugese +- https://github.com/punyajoy/DE-LIMIT +- https://arxiv.org/abs/2004.06465 \ No newline at end of file From 428636b6706464a4baaac39305bd076cb8cf9610 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 15:08:20 +0700 Subject: [PATCH 034/301] Add model 2023-10-25-fine_tuned_indonli_augmented_with_indobert_base_uncased_afaji_en --- ...ted_with_indobert_base_uncased_afaji_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-fine_tuned_indonli_augmented_with_indobert_base_uncased_afaji_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-fine_tuned_indonli_augmented_with_indobert_base_uncased_afaji_en.md b/docs/_posts/ahmedlone127/2023-10-25-fine_tuned_indonli_augmented_with_indobert_base_uncased_afaji_en.md new file mode 100644 index 00000000000000..3980e5051c15b1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-fine_tuned_indonli_augmented_with_indobert_base_uncased_afaji_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English fine_tuned_indonli_augmented_with_indobert_base_uncased_afaji BertForSequenceClassification from afaji +author: John Snow Labs +name: fine_tuned_indonli_augmented_with_indobert_base_uncased_afaji +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`fine_tuned_indonli_augmented_with_indobert_base_uncased_afaji` is a English model originally trained by afaji. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/fine_tuned_indonli_augmented_with_indobert_base_uncased_afaji_en_5.1.4_3.4_1698221048030.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/fine_tuned_indonli_augmented_with_indobert_base_uncased_afaji_en_5.1.4_3.4_1698221048030.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("fine_tuned_indonli_augmented_with_indobert_base_uncased_afaji","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("fine_tuned_indonli_augmented_with_indobert_base_uncased_afaji","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|fine_tuned_indonli_augmented_with_indobert_base_uncased_afaji| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|413.9 MB| + +## References + +https://huggingface.co/afaji/fine-tuned-IndoNLI-Augmented-with-indobert-base-uncased \ No newline at end of file From 8daa75bbb0831f330a55b34aa980509cac924e78 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 15:09:20 +0700 Subject: [PATCH 035/301] Add model 2023-10-25-bert_base_uncased_finetuned_iemocap4_en --- ...bert_base_uncased_finetuned_iemocap4_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_base_uncased_finetuned_iemocap4_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_base_uncased_finetuned_iemocap4_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_base_uncased_finetuned_iemocap4_en.md new file mode 100644 index 00000000000000..cd56cc076f64b3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_base_uncased_finetuned_iemocap4_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_base_uncased_finetuned_iemocap4 BertForSequenceClassification from Zahra99 +author: John Snow Labs +name: bert_base_uncased_finetuned_iemocap4 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_finetuned_iemocap4` is a English model originally trained by Zahra99. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_finetuned_iemocap4_en_5.1.4_3.4_1698215069499.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_finetuned_iemocap4_en_5.1.4_3.4_1698215069499.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_base_uncased_finetuned_iemocap4","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_base_uncased_finetuned_iemocap4","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_finetuned_iemocap4| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Zahra99/bert-base-uncased-finetuned-iemocap4 \ No newline at end of file From 7ff436bd0d4ce4c3e6d770752dc39ae92a0d8784 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 15:10:21 +0700 Subject: [PATCH 036/301] Add model 2023-10-25-bert_base_uncased_regression_edmunds_car_reviews_en --- ...cased_regression_edmunds_car_reviews_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_base_uncased_regression_edmunds_car_reviews_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_base_uncased_regression_edmunds_car_reviews_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_base_uncased_regression_edmunds_car_reviews_en.md new file mode 100644 index 00000000000000..d2d2a42e9df806 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_base_uncased_regression_edmunds_car_reviews_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_base_uncased_regression_edmunds_car_reviews BertForSequenceClassification from DunnBC22 +author: John Snow Labs +name: bert_base_uncased_regression_edmunds_car_reviews +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_regression_edmunds_car_reviews` is a English model originally trained by DunnBC22. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_regression_edmunds_car_reviews_en_5.1.4_3.4_1698210378387.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_regression_edmunds_car_reviews_en_5.1.4_3.4_1698210378387.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_base_uncased_regression_edmunds_car_reviews","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_base_uncased_regression_edmunds_car_reviews","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_regression_edmunds_car_reviews| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/DunnBC22/bert-base-uncased-Regression-Edmunds_Car_Reviews \ No newline at end of file From f2062d793b73985c4d9d509f1cdebee95abdbcd6 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 15:11:21 +0700 Subject: [PATCH 037/301] Add model 2023-10-25-bert_classifier_autotrain_wikipedia_sst_2_1034235509_en --- ...autotrain_wikipedia_sst_2_1034235509_en.md | 106 ++++++++++++++++++ 1 file changed, 106 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_classifier_autotrain_wikipedia_sst_2_1034235509_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_classifier_autotrain_wikipedia_sst_2_1034235509_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_classifier_autotrain_wikipedia_sst_2_1034235509_en.md new file mode 100644 index 00000000000000..d4bceb039d04a7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_classifier_autotrain_wikipedia_sst_2_1034235509_en.md @@ -0,0 +1,106 @@ +--- +layout: model +title: English BertForSequenceClassification Cased model (from deepesh0x) +author: John Snow Labs +name: bert_classifier_autotrain_wikipedia_sst_2_1034235509 +date: 2023-10-25 +tags: [bert, sequence_classification, classification, open_source, en, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `autotrain-bert_wikipedia_sst_2-1034235509` is a English model originally trained by `deepesh0x`. + +## Predicted Entities + +`negative`, `positive` + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_classifier_autotrain_wikipedia_sst_2_1034235509_en_5.1.4_3.4_1698212046365.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_classifier_autotrain_wikipedia_sst_2_1034235509_en_5.1.4_3.4_1698212046365.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +sequenceClassifier_loaded = BertForSequenceClassification.pretrained("bert_classifier_autotrain_wikipedia_sst_2_1034235509","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("class") + +pipeline = Pipeline(stages=[documentAssembler, tokenizer,sequenceClassifier_loaded]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier_loaded = BertForSequenceClassification.pretrained("bert_classifier_autotrain_wikipedia_sst_2_1034235509","en") + .setInputCols(Array("document", "token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer,sequenceClassifier_loaded)) + +val data = Seq("PUT YOUR STRING HERE").toDF("text") + +val result = pipeline.fit(data).transform(data) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("en.classify.bert.by_deepesh0x").predict("""PUT YOUR STRING HERE""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_classifier_autotrain_wikipedia_sst_2_1034235509| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| +|Case sensitive:|true| +|Max sentence length:|256| + +## References + +References + +- https://huggingface.co/deepesh0x/autotrain-bert_wikipedia_sst_2-1034235509 \ No newline at end of file From 94309b661a67ead05f0de5c465aad37fa163c5e8 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 15:12:21 +0700 Subject: [PATCH 038/301] Add model 2023-10-25-bert_ft_qqp_91_jeevesh8_en --- .../2023-10-25-bert_ft_qqp_91_jeevesh8_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_91_jeevesh8_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_91_jeevesh8_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_91_jeevesh8_en.md new file mode 100644 index 00000000000000..5d572e035ecb56 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_91_jeevesh8_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_ft_qqp_91_jeevesh8 BertForSequenceClassification from Jeevesh8 +author: John Snow Labs +name: bert_ft_qqp_91_jeevesh8 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_ft_qqp_91_jeevesh8` is a English model originally trained by Jeevesh8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_ft_qqp_91_jeevesh8_en_5.1.4_3.4_1698221523166.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_ft_qqp_91_jeevesh8_en_5.1.4_3.4_1698221523166.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_ft_qqp_91_jeevesh8","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_ft_qqp_91_jeevesh8","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_ft_qqp_91_jeevesh8| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Jeevesh8/bert_ft_qqp-91 \ No newline at end of file From ad1ad6346ea412dfe1072b3040289d6aa2fd4add Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 15:13:21 +0700 Subject: [PATCH 039/301] Add model 2023-10-25-boss_toxicity_bert_base_uncased_en --- ...0-25-boss_toxicity_bert_base_uncased_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-boss_toxicity_bert_base_uncased_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-boss_toxicity_bert_base_uncased_en.md b/docs/_posts/ahmedlone127/2023-10-25-boss_toxicity_bert_base_uncased_en.md new file mode 100644 index 00000000000000..376157fbf627b7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-boss_toxicity_bert_base_uncased_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English boss_toxicity_bert_base_uncased BertForSequenceClassification from Kyle1668 +author: John Snow Labs +name: boss_toxicity_bert_base_uncased +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`boss_toxicity_bert_base_uncased` is a English model originally trained by Kyle1668. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/boss_toxicity_bert_base_uncased_en_5.1.4_3.4_1698221527242.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/boss_toxicity_bert_base_uncased_en_5.1.4_3.4_1698221527242.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("boss_toxicity_bert_base_uncased","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("boss_toxicity_bert_base_uncased","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|boss_toxicity_bert_base_uncased| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Kyle1668/boss-toxicity-bert-base-uncased \ No newline at end of file From 38d63c5c5f62190443c1f22bb6e55adf41d6f5dd Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 15:14:21 +0700 Subject: [PATCH 040/301] Add model 2023-10-25-goog_bert_ft_cola_45_en --- .../2023-10-25-goog_bert_ft_cola_45_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_45_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_45_en.md b/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_45_en.md new file mode 100644 index 00000000000000..9e80699e8a0ae9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_45_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English goog_bert_ft_cola_45 BertForSequenceClassification from Jeevesh8 +author: John Snow Labs +name: goog_bert_ft_cola_45 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`goog_bert_ft_cola_45` is a English model originally trained by Jeevesh8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/goog_bert_ft_cola_45_en_5.1.4_3.4_1698221572355.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/goog_bert_ft_cola_45_en_5.1.4_3.4_1698221572355.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("goog_bert_ft_cola_45","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("goog_bert_ft_cola_45","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|goog_bert_ft_cola_45| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Jeevesh8/goog_bert_ft_cola-45 \ No newline at end of file From 6db9490e6209942021573eec7d049598648dd3ff Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 15:15:22 +0700 Subject: [PATCH 041/301] Add model 2023-10-25-bert_vanilla_en --- .../2023-10-25-bert_vanilla_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_vanilla_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_vanilla_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_vanilla_en.md new file mode 100644 index 00000000000000..b0cec13a6b38c5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_vanilla_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_vanilla BertForSequenceClassification from LilaBoualili +author: John Snow Labs +name: bert_vanilla +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_vanilla` is a English model originally trained by LilaBoualili. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_vanilla_en_5.1.4_3.4_1698221552184.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_vanilla_en_5.1.4_3.4_1698221552184.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_vanilla","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_vanilla","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_vanilla| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/LilaBoualili/bert-vanilla \ No newline at end of file From 10660537ebd31b28581b3f2838b40f785f0d1fb6 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 15:16:22 +0700 Subject: [PATCH 042/301] Add model 2023-10-25-bert_ft_qqp_55_jeevesh8_en --- .../2023-10-25-bert_ft_qqp_55_jeevesh8_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_55_jeevesh8_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_55_jeevesh8_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_55_jeevesh8_en.md new file mode 100644 index 00000000000000..010d33b0f4687c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_55_jeevesh8_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_ft_qqp_55_jeevesh8 BertForSequenceClassification from Jeevesh8 +author: John Snow Labs +name: bert_ft_qqp_55_jeevesh8 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_ft_qqp_55_jeevesh8` is a English model originally trained by Jeevesh8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_ft_qqp_55_jeevesh8_en_5.1.4_3.4_1698214456511.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_ft_qqp_55_jeevesh8_en_5.1.4_3.4_1698214456511.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_ft_qqp_55_jeevesh8","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_ft_qqp_55_jeevesh8","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_ft_qqp_55_jeevesh8| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Jeevesh8/bert_ft_qqp-55 \ No newline at end of file From 9cf34727a8d0092789bc189c7e75e957ae4bf4b9 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 15:17:22 +0700 Subject: [PATCH 043/301] Add model 2023-10-25-ag_news_bert_base_uncased_en --- ...2023-10-25-ag_news_bert_base_uncased_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-ag_news_bert_base_uncased_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-ag_news_bert_base_uncased_en.md b/docs/_posts/ahmedlone127/2023-10-25-ag_news_bert_base_uncased_en.md new file mode 100644 index 00000000000000..716d441fbee0e2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-ag_news_bert_base_uncased_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English ag_news_bert_base_uncased BertForSequenceClassification from Kyle1668 +author: John Snow Labs +name: ag_news_bert_base_uncased +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ag_news_bert_base_uncased` is a English model originally trained by Kyle1668. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ag_news_bert_base_uncased_en_5.1.4_3.4_1698221722413.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ag_news_bert_base_uncased_en_5.1.4_3.4_1698221722413.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("ag_news_bert_base_uncased","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("ag_news_bert_base_uncased","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ag_news_bert_base_uncased| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Kyle1668/ag-news-bert-base-uncased \ No newline at end of file From c17ed3180f8650ee7416fb225953e6843ba31bdb Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 15:18:22 +0700 Subject: [PATCH 044/301] Add model 2023-10-25-bert_base_uncased_finetuned_sufficiency_ukp_balanced_en --- ...d_finetuned_sufficiency_ukp_balanced_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_base_uncased_finetuned_sufficiency_ukp_balanced_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_base_uncased_finetuned_sufficiency_ukp_balanced_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_base_uncased_finetuned_sufficiency_ukp_balanced_en.md new file mode 100644 index 00000000000000..f721c063d5e0af --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_base_uncased_finetuned_sufficiency_ukp_balanced_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_base_uncased_finetuned_sufficiency_ukp_balanced BertForSequenceClassification from jakub014 +author: John Snow Labs +name: bert_base_uncased_finetuned_sufficiency_ukp_balanced +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_finetuned_sufficiency_ukp_balanced` is a English model originally trained by jakub014. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_finetuned_sufficiency_ukp_balanced_en_5.1.4_3.4_1698218620317.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_finetuned_sufficiency_ukp_balanced_en_5.1.4_3.4_1698218620317.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_base_uncased_finetuned_sufficiency_ukp_balanced","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_base_uncased_finetuned_sufficiency_ukp_balanced","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_finetuned_sufficiency_ukp_balanced| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/jakub014/bert-base-uncased-finetuned-sufficiency-ukp-balanced \ No newline at end of file From db36e04203d4d6e15838b043e2164a3cb981c05d Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 15:19:23 +0700 Subject: [PATCH 045/301] Add model 2023-10-25-autotrain_bert_wikipedia_sst_2_1034235513_en --- ...rain_bert_wikipedia_sst_2_1034235513_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-autotrain_bert_wikipedia_sst_2_1034235513_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-autotrain_bert_wikipedia_sst_2_1034235513_en.md b/docs/_posts/ahmedlone127/2023-10-25-autotrain_bert_wikipedia_sst_2_1034235513_en.md new file mode 100644 index 00000000000000..29142f1ea13ac5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-autotrain_bert_wikipedia_sst_2_1034235513_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English autotrain_bert_wikipedia_sst_2_1034235513 BertForSequenceClassification from deepesh0x +author: John Snow Labs +name: autotrain_bert_wikipedia_sst_2_1034235513 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`autotrain_bert_wikipedia_sst_2_1034235513` is a English model originally trained by deepesh0x. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/autotrain_bert_wikipedia_sst_2_1034235513_en_5.1.4_3.4_1698212254160.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/autotrain_bert_wikipedia_sst_2_1034235513_en_5.1.4_3.4_1698212254160.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("autotrain_bert_wikipedia_sst_2_1034235513","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("autotrain_bert_wikipedia_sst_2_1034235513","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|autotrain_bert_wikipedia_sst_2_1034235513| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/deepesh0x/autotrain-bert_wikipedia_sst_2-1034235513 \ No newline at end of file From 94c06fb27e39e2c9ae786e2b54fa5665ef7344d5 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 15:20:23 +0700 Subject: [PATCH 046/301] Add model 2023-10-25-bert_ft_qqp_89_jeevesh8_en --- .../2023-10-25-bert_ft_qqp_89_jeevesh8_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_89_jeevesh8_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_89_jeevesh8_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_89_jeevesh8_en.md new file mode 100644 index 00000000000000..154c0757ad2f48 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_89_jeevesh8_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_ft_qqp_89_jeevesh8 BertForSequenceClassification from Jeevesh8 +author: John Snow Labs +name: bert_ft_qqp_89_jeevesh8 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_ft_qqp_89_jeevesh8` is a English model originally trained by Jeevesh8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_ft_qqp_89_jeevesh8_en_5.1.4_3.4_1698221153353.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_ft_qqp_89_jeevesh8_en_5.1.4_3.4_1698221153353.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_ft_qqp_89_jeevesh8","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_ft_qqp_89_jeevesh8","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_ft_qqp_89_jeevesh8| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Jeevesh8/bert_ft_qqp-89 \ No newline at end of file From be1d8c7f356b1ea77b21e74a6b86f62998d39b48 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 15:21:23 +0700 Subject: [PATCH 047/301] Add model 2023-10-25-bert_cased_sst2_finetuned_en --- ...2023-10-25-bert_cased_sst2_finetuned_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_cased_sst2_finetuned_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_cased_sst2_finetuned_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_cased_sst2_finetuned_en.md new file mode 100644 index 00000000000000..98824407c260e8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_cased_sst2_finetuned_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_cased_sst2_finetuned BertForSequenceClassification from ElcKeT +author: John Snow Labs +name: bert_cased_sst2_finetuned +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_cased_sst2_finetuned` is a English model originally trained by ElcKeT. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_cased_sst2_finetuned_en_5.1.4_3.4_1698221932937.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_cased_sst2_finetuned_en_5.1.4_3.4_1698221932937.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_cased_sst2_finetuned","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_cased_sst2_finetuned","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_cased_sst2_finetuned| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|405.9 MB| + +## References + +https://huggingface.co/ElcKeT/bert-cased-sst2-finetuned \ No newline at end of file From 444958f3a22f2fbd3b5e7a1784f322b856bec740 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 15:22:23 +0700 Subject: [PATCH 048/301] Add model 2023-10-25-bert_italian_emotion_en --- .../2023-10-25-bert_italian_emotion_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_italian_emotion_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_italian_emotion_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_italian_emotion_en.md new file mode 100644 index 00000000000000..b76d0f2c0bd357 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_italian_emotion_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_italian_emotion BertForSequenceClassification from pysentimiento +author: John Snow Labs +name: bert_italian_emotion +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_italian_emotion` is a English model originally trained by pysentimiento. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_italian_emotion_en_5.1.4_3.4_1698221965080.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_italian_emotion_en_5.1.4_3.4_1698221965080.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_italian_emotion","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_italian_emotion","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_italian_emotion| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|411.9 MB| + +## References + +https://huggingface.co/pysentimiento/bert-it-emotion \ No newline at end of file From dfb0636912d1e8375c780604573e586532ef12cc Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 15:23:23 +0700 Subject: [PATCH 049/301] Add model 2023-10-25-bert_ft_qqp_93_jeevesh8_en --- .../2023-10-25-bert_ft_qqp_93_jeevesh8_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_93_jeevesh8_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_93_jeevesh8_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_93_jeevesh8_en.md new file mode 100644 index 00000000000000..5ad503ad756a17 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_93_jeevesh8_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_ft_qqp_93_jeevesh8 BertForSequenceClassification from Jeevesh8 +author: John Snow Labs +name: bert_ft_qqp_93_jeevesh8 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_ft_qqp_93_jeevesh8` is a English model originally trained by Jeevesh8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_ft_qqp_93_jeevesh8_en_5.1.4_3.4_1698221927847.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_ft_qqp_93_jeevesh8_en_5.1.4_3.4_1698221927847.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_ft_qqp_93_jeevesh8","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_ft_qqp_93_jeevesh8","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_ft_qqp_93_jeevesh8| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Jeevesh8/bert_ft_qqp-93 \ No newline at end of file From ea70b7018114ea6b758c74b29d12ddc09cc2b7d5 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 15:24:23 +0700 Subject: [PATCH 050/301] Add model 2023-10-25-bert_mini_finetuned_mrpc_en --- .../2023-10-25-bert_mini_finetuned_mrpc_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_mini_finetuned_mrpc_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_mini_finetuned_mrpc_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_mini_finetuned_mrpc_en.md new file mode 100644 index 00000000000000..4d6737a21da254 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_mini_finetuned_mrpc_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_mini_finetuned_mrpc BertForSequenceClassification from M-FAC +author: John Snow Labs +name: bert_mini_finetuned_mrpc +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_mini_finetuned_mrpc` is a English model originally trained by M-FAC. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_mini_finetuned_mrpc_en_5.1.4_3.4_1698222142012.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_mini_finetuned_mrpc_en_5.1.4_3.4_1698222142012.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_mini_finetuned_mrpc","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_mini_finetuned_mrpc","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_mini_finetuned_mrpc| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|42.1 MB| + +## References + +https://huggingface.co/M-FAC/bert-mini-finetuned-mrpc \ No newline at end of file From 6363679a075727885def2bc1f482a750bcedc03e Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 15:25:24 +0700 Subject: [PATCH 051/301] Add model 2023-10-25-goog_bert_ft_cola_23_en --- .../2023-10-25-goog_bert_ft_cola_23_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_23_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_23_en.md b/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_23_en.md new file mode 100644 index 00000000000000..10f7b5ba369f76 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_23_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English goog_bert_ft_cola_23 BertForSequenceClassification from Jeevesh8 +author: John Snow Labs +name: goog_bert_ft_cola_23 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`goog_bert_ft_cola_23` is a English model originally trained by Jeevesh8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/goog_bert_ft_cola_23_en_5.1.4_3.4_1698222310863.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/goog_bert_ft_cola_23_en_5.1.4_3.4_1698222310863.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("goog_bert_ft_cola_23","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("goog_bert_ft_cola_23","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|goog_bert_ft_cola_23| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Jeevesh8/goog_bert_ft_cola-23 \ No newline at end of file From 17c7f3894618abe770632da139a15fa12d949571 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 15:26:24 +0700 Subject: [PATCH 052/301] Add model 2023-10-25-bert_ft_qqp_68_jeevesh8_en --- .../2023-10-25-bert_ft_qqp_68_jeevesh8_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_68_jeevesh8_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_68_jeevesh8_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_68_jeevesh8_en.md new file mode 100644 index 00000000000000..06c843a9e74b16 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_68_jeevesh8_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_ft_qqp_68_jeevesh8 BertForSequenceClassification from Jeevesh8 +author: John Snow Labs +name: bert_ft_qqp_68_jeevesh8 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_ft_qqp_68_jeevesh8` is a English model originally trained by Jeevesh8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_ft_qqp_68_jeevesh8_en_5.1.4_3.4_1698217001117.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_ft_qqp_68_jeevesh8_en_5.1.4_3.4_1698217001117.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_ft_qqp_68_jeevesh8","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_ft_qqp_68_jeevesh8","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_ft_qqp_68_jeevesh8| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Jeevesh8/bert_ft_qqp-68 \ No newline at end of file From c1bbc2e618f24f74c680cad6383d8d9e8410c65e Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 15:27:24 +0700 Subject: [PATCH 053/301] Add model 2023-10-25-bert_base_uncased_sst_2_32_13_30_en --- ...-25-bert_base_uncased_sst_2_32_13_30_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_base_uncased_sst_2_32_13_30_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_base_uncased_sst_2_32_13_30_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_base_uncased_sst_2_32_13_30_en.md new file mode 100644 index 00000000000000..d62f2db4724571 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_base_uncased_sst_2_32_13_30_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_base_uncased_sst_2_32_13_30 BertForSequenceClassification from simonycl +author: John Snow Labs +name: bert_base_uncased_sst_2_32_13_30 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_sst_2_32_13_30` is a English model originally trained by simonycl. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_sst_2_32_13_30_en_5.1.4_3.4_1698222331550.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_sst_2_32_13_30_en_5.1.4_3.4_1698222331550.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_base_uncased_sst_2_32_13_30","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_base_uncased_sst_2_32_13_30","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_sst_2_32_13_30| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/simonycl/bert-base-uncased-sst-2-32-13-30 \ No newline at end of file From 2977be9666918bf62c3bc6d0bf76e28daea9fb13 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 15:28:24 +0700 Subject: [PATCH 054/301] Add model 2023-10-25-boss_sentiment_bert_base_uncased_en --- ...-25-boss_sentiment_bert_base_uncased_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-boss_sentiment_bert_base_uncased_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-boss_sentiment_bert_base_uncased_en.md b/docs/_posts/ahmedlone127/2023-10-25-boss_sentiment_bert_base_uncased_en.md new file mode 100644 index 00000000000000..13522eff0e11e1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-boss_sentiment_bert_base_uncased_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English boss_sentiment_bert_base_uncased BertForSequenceClassification from Kyle1668 +author: John Snow Labs +name: boss_sentiment_bert_base_uncased +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`boss_sentiment_bert_base_uncased` is a English model originally trained by Kyle1668. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/boss_sentiment_bert_base_uncased_en_5.1.4_3.4_1698221348012.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/boss_sentiment_bert_base_uncased_en_5.1.4_3.4_1698221348012.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("boss_sentiment_bert_base_uncased","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("boss_sentiment_bert_base_uncased","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|boss_sentiment_bert_base_uncased| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Kyle1668/boss-sentiment-bert-base-uncased \ No newline at end of file From f7c112b8be9b5878fa980841337309be9e5d4edd Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 15:29:24 +0700 Subject: [PATCH 055/301] Add model 2023-10-25-bert_mini_finetuned_sst2_en --- .../2023-10-25-bert_mini_finetuned_sst2_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_mini_finetuned_sst2_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_mini_finetuned_sst2_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_mini_finetuned_sst2_en.md new file mode 100644 index 00000000000000..5647461caea8d4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_mini_finetuned_sst2_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_mini_finetuned_sst2 BertForSequenceClassification from M-FAC +author: John Snow Labs +name: bert_mini_finetuned_sst2 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_mini_finetuned_sst2` is a English model originally trained by M-FAC. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_mini_finetuned_sst2_en_5.1.4_3.4_1698222448691.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_mini_finetuned_sst2_en_5.1.4_3.4_1698222448691.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_mini_finetuned_sst2","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_mini_finetuned_sst2","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_mini_finetuned_sst2| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|42.1 MB| + +## References + +https://huggingface.co/M-FAC/bert-mini-finetuned-sst2 \ No newline at end of file From 9c35c9334e66d6f2bab6120f01830d36ac2449ff Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 15:30:24 +0700 Subject: [PATCH 056/301] Add model 2023-10-25-bert_ft_qqp_95_jeevesh8_en --- .../2023-10-25-bert_ft_qqp_95_jeevesh8_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_95_jeevesh8_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_95_jeevesh8_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_95_jeevesh8_en.md new file mode 100644 index 00000000000000..aed86d7d67f013 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_95_jeevesh8_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_ft_qqp_95_jeevesh8 BertForSequenceClassification from Jeevesh8 +author: John Snow Labs +name: bert_ft_qqp_95_jeevesh8 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_ft_qqp_95_jeevesh8` is a English model originally trained by Jeevesh8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_ft_qqp_95_jeevesh8_en_5.1.4_3.4_1698222330820.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_ft_qqp_95_jeevesh8_en_5.1.4_3.4_1698222330820.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_ft_qqp_95_jeevesh8","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_ft_qqp_95_jeevesh8","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_ft_qqp_95_jeevesh8| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Jeevesh8/bert_ft_qqp-95 \ No newline at end of file From c911f8dbb43c3389b69078562af3c932cf128aa2 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 15:31:25 +0700 Subject: [PATCH 057/301] Add model 2023-10-25-bert_tiny_finetuned_mnli_en --- .../2023-10-25-bert_tiny_finetuned_mnli_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_tiny_finetuned_mnli_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_tiny_finetuned_mnli_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_tiny_finetuned_mnli_en.md new file mode 100644 index 00000000000000..4ce35b5fb49fc7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_tiny_finetuned_mnli_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_tiny_finetuned_mnli BertForSequenceClassification from M-FAC +author: John Snow Labs +name: bert_tiny_finetuned_mnli +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_tiny_finetuned_mnli` is a English model originally trained by M-FAC. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_tiny_finetuned_mnli_en_5.1.4_3.4_1698222635613.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_tiny_finetuned_mnli_en_5.1.4_3.4_1698222635613.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_tiny_finetuned_mnli","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_tiny_finetuned_mnli","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_tiny_finetuned_mnli| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|16.7 MB| + +## References + +https://huggingface.co/M-FAC/bert-tiny-finetuned-mnli \ No newline at end of file From 498c8753bd05771eaf645bc34805450ef7fa6506 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 15:32:25 +0700 Subject: [PATCH 058/301] Add model 2023-10-25-bert_base_dutch_cased_finetuned_dt_en --- ...5-bert_base_dutch_cased_finetuned_dt_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_base_dutch_cased_finetuned_dt_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_base_dutch_cased_finetuned_dt_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_base_dutch_cased_finetuned_dt_en.md new file mode 100644 index 00000000000000..1ba10f1a6edf3b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_base_dutch_cased_finetuned_dt_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_base_dutch_cased_finetuned_dt BertForSequenceClassification from ArjanvD95 +author: John Snow Labs +name: bert_base_dutch_cased_finetuned_dt +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_dutch_cased_finetuned_dt` is a English model originally trained by ArjanvD95. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_dutch_cased_finetuned_dt_en_5.1.4_3.4_1698212262096.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_dutch_cased_finetuned_dt_en_5.1.4_3.4_1698212262096.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_base_dutch_cased_finetuned_dt","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_base_dutch_cased_finetuned_dt","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_dutch_cased_finetuned_dt| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.0 MB| + +## References + +https://huggingface.co/ArjanvD95/bert-base-dutch-cased-finetuned-dt \ No newline at end of file From 8f437c103e86046fa518ca8d595bac785251c550 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 15:33:25 +0700 Subject: [PATCH 059/301] Add model 2023-10-25-bert_tiny_finetuned_mrpc_en --- .../2023-10-25-bert_tiny_finetuned_mrpc_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_tiny_finetuned_mrpc_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_tiny_finetuned_mrpc_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_tiny_finetuned_mrpc_en.md new file mode 100644 index 00000000000000..bb433413792c4c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_tiny_finetuned_mrpc_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_tiny_finetuned_mrpc BertForSequenceClassification from M-FAC +author: John Snow Labs +name: bert_tiny_finetuned_mrpc +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_tiny_finetuned_mrpc` is a English model originally trained by M-FAC. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_tiny_finetuned_mrpc_en_5.1.4_3.4_1698222721542.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_tiny_finetuned_mrpc_en_5.1.4_3.4_1698222721542.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_tiny_finetuned_mrpc","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_tiny_finetuned_mrpc","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_tiny_finetuned_mrpc| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|16.7 MB| + +## References + +https://huggingface.co/M-FAC/bert-tiny-finetuned-mrpc \ No newline at end of file From e3b61030aa012a93d9c86a420a337ab632e5a05f Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 15:34:25 +0700 Subject: [PATCH 060/301] Add model 2023-10-25-goog_bert_ft_cola_46_en --- .../2023-10-25-goog_bert_ft_cola_46_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_46_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_46_en.md b/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_46_en.md new file mode 100644 index 00000000000000..4a5551ef179908 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_46_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English goog_bert_ft_cola_46 BertForSequenceClassification from Jeevesh8 +author: John Snow Labs +name: goog_bert_ft_cola_46 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`goog_bert_ft_cola_46` is a English model originally trained by Jeevesh8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/goog_bert_ft_cola_46_en_5.1.4_3.4_1698222695220.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/goog_bert_ft_cola_46_en_5.1.4_3.4_1698222695220.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("goog_bert_ft_cola_46","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("goog_bert_ft_cola_46","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|goog_bert_ft_cola_46| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Jeevesh8/goog_bert_ft_cola-46 \ No newline at end of file From 4f91fdd3774081dfa64f73823f965dba1a49f57b Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 15:35:26 +0700 Subject: [PATCH 061/301] Add model 2023-10-25-bert_tiny_finetuned_qnli_en --- .../2023-10-25-bert_tiny_finetuned_qnli_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_tiny_finetuned_qnli_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_tiny_finetuned_qnli_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_tiny_finetuned_qnli_en.md new file mode 100644 index 00000000000000..4ea066c989469b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_tiny_finetuned_qnli_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_tiny_finetuned_qnli BertForSequenceClassification from M-FAC +author: John Snow Labs +name: bert_tiny_finetuned_qnli +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_tiny_finetuned_qnli` is a English model originally trained by M-FAC. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_tiny_finetuned_qnli_en_5.1.4_3.4_1698222815267.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_tiny_finetuned_qnli_en_5.1.4_3.4_1698222815267.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_tiny_finetuned_qnli","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_tiny_finetuned_qnli","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_tiny_finetuned_qnli| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|16.7 MB| + +## References + +https://huggingface.co/M-FAC/bert-tiny-finetuned-qnli \ No newline at end of file From 7577ed1da88eab079decf4e7b020859233846f8a Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 15:36:26 +0700 Subject: [PATCH 062/301] Add model 2023-10-25-dk_emotion_bert_class_en --- .../2023-10-25-dk_emotion_bert_class_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-dk_emotion_bert_class_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-dk_emotion_bert_class_en.md b/docs/_posts/ahmedlone127/2023-10-25-dk_emotion_bert_class_en.md new file mode 100644 index 00000000000000..5bb80989303491 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-dk_emotion_bert_class_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English dk_emotion_bert_class BertForSequenceClassification from Korsholm22 +author: John Snow Labs +name: dk_emotion_bert_class +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dk_emotion_bert_class` is a English model originally trained by Korsholm22. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dk_emotion_bert_class_en_5.1.4_3.4_1698222898858.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dk_emotion_bert_class_en_5.1.4_3.4_1698222898858.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("dk_emotion_bert_class","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("dk_emotion_bert_class","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dk_emotion_bert_class| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|414.6 MB| + +## References + +https://huggingface.co/Korsholm22/dk_emotion_bert_class \ No newline at end of file From 71f5f848833df4ab15676b89f1fc9f5cb00170fd Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 15:37:26 +0700 Subject: [PATCH 063/301] Add model 2023-10-25-goog_bert_ft_cola_47_en --- .../2023-10-25-goog_bert_ft_cola_47_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_47_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_47_en.md b/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_47_en.md new file mode 100644 index 00000000000000..abe8336f4ea9a2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_47_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English goog_bert_ft_cola_47 BertForSequenceClassification from Jeevesh8 +author: John Snow Labs +name: goog_bert_ft_cola_47 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`goog_bert_ft_cola_47` is a English model originally trained by Jeevesh8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/goog_bert_ft_cola_47_en_5.1.4_3.4_1698222867221.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/goog_bert_ft_cola_47_en_5.1.4_3.4_1698222867221.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("goog_bert_ft_cola_47","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("goog_bert_ft_cola_47","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|goog_bert_ft_cola_47| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Jeevesh8/goog_bert_ft_cola-47 \ No newline at end of file From d7e5ac564b7a9a38d445ec5e6900fb3956cd8cb0 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 15:38:26 +0700 Subject: [PATCH 064/301] Add model 2023-10-25-bert_tiny_finetuned_sst2_en --- .../2023-10-25-bert_tiny_finetuned_sst2_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_tiny_finetuned_sst2_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_tiny_finetuned_sst2_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_tiny_finetuned_sst2_en.md new file mode 100644 index 00000000000000..119e41dac1f850 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_tiny_finetuned_sst2_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_tiny_finetuned_sst2 BertForSequenceClassification from M-FAC +author: John Snow Labs +name: bert_tiny_finetuned_sst2 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_tiny_finetuned_sst2` is a English model originally trained by M-FAC. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_tiny_finetuned_sst2_en_5.1.4_3.4_1698223010239.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_tiny_finetuned_sst2_en_5.1.4_3.4_1698223010239.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_tiny_finetuned_sst2","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_tiny_finetuned_sst2","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_tiny_finetuned_sst2| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|16.7 MB| + +## References + +https://huggingface.co/M-FAC/bert-tiny-finetuned-sst2 \ No newline at end of file From 271e4adf03c86790a136dac2b3c68a0445284859 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 15:39:26 +0700 Subject: [PATCH 065/301] Add model 2023-10-25-dk_emotion_bert_2_en --- .../2023-10-25-dk_emotion_bert_2_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-dk_emotion_bert_2_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-dk_emotion_bert_2_en.md b/docs/_posts/ahmedlone127/2023-10-25-dk_emotion_bert_2_en.md new file mode 100644 index 00000000000000..4953bc34c7f514 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-dk_emotion_bert_2_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English dk_emotion_bert_2 BertForSequenceClassification from Only-Mike +author: John Snow Labs +name: dk_emotion_bert_2 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dk_emotion_bert_2` is a English model originally trained by Only-Mike. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dk_emotion_bert_2_en_5.1.4_3.4_1698223071802.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dk_emotion_bert_2_en_5.1.4_3.4_1698223071802.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("dk_emotion_bert_2","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("dk_emotion_bert_2","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dk_emotion_bert_2| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|414.6 MB| + +## References + +https://huggingface.co/Only-Mike/dk_emotion_bert_2 \ No newline at end of file From 50511ee0934763c5912490ce3606d0f870eee3eb Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 15:40:26 +0700 Subject: [PATCH 066/301] Add model 2023-10-25-goog_bert_ft_cola_71_en --- .../2023-10-25-goog_bert_ft_cola_71_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_71_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_71_en.md b/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_71_en.md new file mode 100644 index 00000000000000..9a6ee5a84ab843 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_71_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English goog_bert_ft_cola_71 BertForSequenceClassification from Jeevesh8 +author: John Snow Labs +name: goog_bert_ft_cola_71 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`goog_bert_ft_cola_71` is a English model originally trained by Jeevesh8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/goog_bert_ft_cola_71_en_5.1.4_3.4_1698223045460.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/goog_bert_ft_cola_71_en_5.1.4_3.4_1698223045460.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("goog_bert_ft_cola_71","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("goog_bert_ft_cola_71","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|goog_bert_ft_cola_71| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Jeevesh8/goog_bert_ft_cola-71 \ No newline at end of file From 9f34824a93abfce69fa68f9771c010e8d0e62b20 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 15:41:27 +0700 Subject: [PATCH 067/301] Add model 2023-10-25-bert_base_uncased_finetuned_iemocap_uptuna2_en --- ...se_uncased_finetuned_iemocap_uptuna2_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_base_uncased_finetuned_iemocap_uptuna2_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_base_uncased_finetuned_iemocap_uptuna2_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_base_uncased_finetuned_iemocap_uptuna2_en.md new file mode 100644 index 00000000000000..c3f77f2231298a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_base_uncased_finetuned_iemocap_uptuna2_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_base_uncased_finetuned_iemocap_uptuna2 BertForSequenceClassification from Zahra99 +author: John Snow Labs +name: bert_base_uncased_finetuned_iemocap_uptuna2 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_finetuned_iemocap_uptuna2` is a English model originally trained by Zahra99. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_finetuned_iemocap_uptuna2_en_5.1.4_3.4_1698213797050.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_finetuned_iemocap_uptuna2_en_5.1.4_3.4_1698213797050.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_base_uncased_finetuned_iemocap_uptuna2","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_base_uncased_finetuned_iemocap_uptuna2","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_finetuned_iemocap_uptuna2| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Zahra99/bert-base-uncased-finetuned-iemocap-uptuna2 \ No newline at end of file From 96a809d8c1cde417a0bf9711e4260188d0145333 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 15:42:27 +0700 Subject: [PATCH 068/301] Add model 2023-10-25-goog_bert_ft_cola_73_en --- .../2023-10-25-goog_bert_ft_cola_73_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_73_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_73_en.md b/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_73_en.md new file mode 100644 index 00000000000000..d3a2d9ac149662 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_73_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English goog_bert_ft_cola_73 BertForSequenceClassification from Jeevesh8 +author: John Snow Labs +name: goog_bert_ft_cola_73 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`goog_bert_ft_cola_73` is a English model originally trained by Jeevesh8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/goog_bert_ft_cola_73_en_5.1.4_3.4_1698223219896.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/goog_bert_ft_cola_73_en_5.1.4_3.4_1698223219896.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("goog_bert_ft_cola_73","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("goog_bert_ft_cola_73","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|goog_bert_ft_cola_73| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Jeevesh8/goog_bert_ft_cola-73 \ No newline at end of file From 90b00e9d7c87a26afa6caad2440e48c8f253a782 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 15:43:27 +0700 Subject: [PATCH 069/301] Add model 2023-10-25-bert_classification_5ksamples_en --- ...-10-25-bert_classification_5ksamples_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_classification_5ksamples_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_classification_5ksamples_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_classification_5ksamples_en.md new file mode 100644 index 00000000000000..bc784121a2d604 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_classification_5ksamples_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_classification_5ksamples BertForSequenceClassification from jayavibhav +author: John Snow Labs +name: bert_classification_5ksamples +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_classification_5ksamples` is a English model originally trained by jayavibhav. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_classification_5ksamples_en_5.1.4_3.4_1698223323561.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_classification_5ksamples_en_5.1.4_3.4_1698223323561.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_classification_5ksamples","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_classification_5ksamples","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_classification_5ksamples| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/jayavibhav/bert-classification-5ksamples \ No newline at end of file From e831d38ef669f47debde73b5dbb9b92c65f58433 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 15:44:27 +0700 Subject: [PATCH 070/301] Add model 2023-10-25-bert_classifier_ara_multiclass_news_ar --- ...-bert_classifier_ara_multiclass_news_ar.md | 100 ++++++++++++++++++ 1 file changed, 100 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_classifier_ara_multiclass_news_ar.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_classifier_ara_multiclass_news_ar.md b/docs/_posts/ahmedlone127/2023-10-25-bert_classifier_ara_multiclass_news_ar.md new file mode 100644 index 00000000000000..902ac68b082fa5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_classifier_ara_multiclass_news_ar.md @@ -0,0 +1,100 @@ +--- +layout: model +title: Arabic BertForSequenceClassification Cased model (from M47Labs) +author: John Snow Labs +name: bert_classifier_ara_multiclass_news +date: 2023-10-25 +tags: [bert, sequence_classification, classification, open_source, ar, onnx] +task: Text Classification +language: ar +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `arabert_multiclass_news` is a Arabic model originally trained by `M47Labs`. + +## Predicted Entities + +`sports`, `politics`, `culture`, `tech`, `religion`, `medical`, `finance` + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_classifier_ara_multiclass_news_ar_5.1.4_3.4_1698223354357.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_classifier_ara_multiclass_news_ar_5.1.4_3.4_1698223354357.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +sequenceClassifier_loaded = BertForSequenceClassification.pretrained("bert_classifier_ara_multiclass_news","ar") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("class") + +pipeline = Pipeline(stages=[documentAssembler, tokenizer,sequenceClassifier_loaded]) + +data = spark.createDataFrame([["أنا أحب الشرارة NLP"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier_loaded = BertForSequenceClassification.pretrained("bert_classifier_ara_multiclass_news","ar") + .setInputCols(Array("document", "token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer,sequenceClassifier_loaded)) + +val data = Seq("أنا أحب الشرارة NLP").toDF("text") + +val result = pipeline.fit(data).transform(data) +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_classifier_ara_multiclass_news| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|ar| +|Size:|414.2 MB| +|Case sensitive:|true| +|Max sentence length:|256| + +## References + +References + +- https://huggingface.co/M47Labs/arabert_multiclass_news \ No newline at end of file From 9430d28639ed5658b9159694744833f248574e27 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 15:45:27 +0700 Subject: [PATCH 071/301] Add model 2023-10-25-tonely_bert_en --- .../ahmedlone127/2023-10-25-tonely_bert_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-tonely_bert_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-tonely_bert_en.md b/docs/_posts/ahmedlone127/2023-10-25-tonely_bert_en.md new file mode 100644 index 00000000000000..d28bb22dad1362 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-tonely_bert_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English tonely_bert BertForSequenceClassification from francheska-vicente +author: John Snow Labs +name: tonely_bert +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tonely_bert` is a English model originally trained by francheska-vicente. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tonely_bert_en_5.1.4_3.4_1698223349625.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tonely_bert_en_5.1.4_3.4_1698223349625.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("tonely_bert","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("tonely_bert","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tonely_bert| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|406.0 MB| + +## References + +https://huggingface.co/francheska-vicente/tonely-bert \ No newline at end of file From 053694a4e528961c665856c62e2a1a10d9bafe31 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 15:46:28 +0700 Subject: [PATCH 072/301] Add model 2023-10-25-bert_base_uncased_sst_2_64_13_30_en --- ...-25-bert_base_uncased_sst_2_64_13_30_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_base_uncased_sst_2_64_13_30_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_base_uncased_sst_2_64_13_30_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_base_uncased_sst_2_64_13_30_en.md new file mode 100644 index 00000000000000..da87b695bc24bf --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_base_uncased_sst_2_64_13_30_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_base_uncased_sst_2_64_13_30 BertForSequenceClassification from simonycl +author: John Snow Labs +name: bert_base_uncased_sst_2_64_13_30 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_sst_2_64_13_30` is a English model originally trained by simonycl. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_sst_2_64_13_30_en_5.1.4_3.4_1698222554412.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_sst_2_64_13_30_en_5.1.4_3.4_1698222554412.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_base_uncased_sst_2_64_13_30","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_base_uncased_sst_2_64_13_30","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_sst_2_64_13_30| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/simonycl/bert-base-uncased-sst-2-64-13-30 \ No newline at end of file From 53700b1577bc7ecb3e621921e4748f299c91cfc3 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 15:47:28 +0700 Subject: [PATCH 073/301] Add model 2023-10-25-bert_ft_cola_81_en --- .../2023-10-25-bert_ft_cola_81_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_ft_cola_81_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_ft_cola_81_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_ft_cola_81_en.md new file mode 100644 index 00000000000000..8dca927e9d1e3b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_ft_cola_81_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_ft_cola_81 BertForSequenceClassification from Jeevesh8 +author: John Snow Labs +name: bert_ft_cola_81 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_ft_cola_81` is a English model originally trained by Jeevesh8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_ft_cola_81_en_5.1.4_3.4_1698223539173.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_ft_cola_81_en_5.1.4_3.4_1698223539173.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_ft_cola_81","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_ft_cola_81","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_ft_cola_81| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Jeevesh8/bert_ft_cola-81 \ No newline at end of file From a1b4f234b353841c117aa64666f1ea6508355178 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 15:48:28 +0700 Subject: [PATCH 074/301] Add model 2023-10-25-bert_classification_10ksamples_en --- ...10-25-bert_classification_10ksamples_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_classification_10ksamples_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_classification_10ksamples_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_classification_10ksamples_en.md new file mode 100644 index 00000000000000..d3230ed49ce898 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_classification_10ksamples_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_classification_10ksamples BertForSequenceClassification from jayavibhav +author: John Snow Labs +name: bert_classification_10ksamples +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_classification_10ksamples` is a English model originally trained by jayavibhav. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_classification_10ksamples_en_5.1.4_3.4_1698223539762.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_classification_10ksamples_en_5.1.4_3.4_1698223539762.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_classification_10ksamples","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_classification_10ksamples","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_classification_10ksamples| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/jayavibhav/bert-classification-10ksamples \ No newline at end of file From 742a4826494ac251ceb16cf591d07b8e782095da Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 15:49:28 +0700 Subject: [PATCH 075/301] Add model 2023-10-25-fine_tuned_indonli_augmented_with_indobert_large_p2_muhammadravi251001_en --- ...indobert_large_p2_muhammadravi251001_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-fine_tuned_indonli_augmented_with_indobert_large_p2_muhammadravi251001_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-fine_tuned_indonli_augmented_with_indobert_large_p2_muhammadravi251001_en.md b/docs/_posts/ahmedlone127/2023-10-25-fine_tuned_indonli_augmented_with_indobert_large_p2_muhammadravi251001_en.md new file mode 100644 index 00000000000000..94bba1b2d3c5b6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-fine_tuned_indonli_augmented_with_indobert_large_p2_muhammadravi251001_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English fine_tuned_indonli_augmented_with_indobert_large_p2_muhammadravi251001 BertForSequenceClassification from muhammadravi251001 +author: John Snow Labs +name: fine_tuned_indonli_augmented_with_indobert_large_p2_muhammadravi251001 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`fine_tuned_indonli_augmented_with_indobert_large_p2_muhammadravi251001` is a English model originally trained by muhammadravi251001. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/fine_tuned_indonli_augmented_with_indobert_large_p2_muhammadravi251001_en_5.1.4_3.4_1698223677212.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/fine_tuned_indonli_augmented_with_indobert_large_p2_muhammadravi251001_en_5.1.4_3.4_1698223677212.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("fine_tuned_indonli_augmented_with_indobert_large_p2_muhammadravi251001","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("fine_tuned_indonli_augmented_with_indobert_large_p2_muhammadravi251001","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|fine_tuned_indonli_augmented_with_indobert_large_p2_muhammadravi251001| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/muhammadravi251001/fine-tuned-IndoNLI-Augmented-with-indobert-large-p2 \ No newline at end of file From 6e6c29911d0ea13cdff26916b5a78f09272208e3 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 15:50:28 +0700 Subject: [PATCH 076/301] Add model 2023-10-25-goog_bert_ft_cola_72_en --- .../2023-10-25-goog_bert_ft_cola_72_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_72_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_72_en.md b/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_72_en.md new file mode 100644 index 00000000000000..8e8a4e73a26404 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_72_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English goog_bert_ft_cola_72 BertForSequenceClassification from Jeevesh8 +author: John Snow Labs +name: goog_bert_ft_cola_72 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`goog_bert_ft_cola_72` is a English model originally trained by Jeevesh8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/goog_bert_ft_cola_72_en_5.1.4_3.4_1698223744727.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/goog_bert_ft_cola_72_en_5.1.4_3.4_1698223744727.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("goog_bert_ft_cola_72","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("goog_bert_ft_cola_72","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|goog_bert_ft_cola_72| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Jeevesh8/goog_bert_ft_cola-72 \ No newline at end of file From 372c1c9cc5ff4f9d28f8558b50b651381f48ada6 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 15:51:29 +0700 Subject: [PATCH 077/301] Add model 2023-10-25-bert_ft_cola_82_en --- .../2023-10-25-bert_ft_cola_82_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_ft_cola_82_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_ft_cola_82_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_ft_cola_82_en.md new file mode 100644 index 00000000000000..5eef4285deaf00 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_ft_cola_82_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_ft_cola_82 BertForSequenceClassification from Jeevesh8 +author: John Snow Labs +name: bert_ft_cola_82 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_ft_cola_82` is a English model originally trained by Jeevesh8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_ft_cola_82_en_5.1.4_3.4_1698223738328.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_ft_cola_82_en_5.1.4_3.4_1698223738328.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_ft_cola_82","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_ft_cola_82","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_ft_cola_82| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Jeevesh8/bert_ft_cola-82 \ No newline at end of file From ff4b1f4518263f727229f03164db4790acdbe90f Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 15:52:29 +0700 Subject: [PATCH 078/301] Add model 2023-10-25-bert_base_uncased_ag_news_finetuned_dwnews_categories_en --- ..._ag_news_finetuned_dwnews_categories_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_base_uncased_ag_news_finetuned_dwnews_categories_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_base_uncased_ag_news_finetuned_dwnews_categories_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_base_uncased_ag_news_finetuned_dwnews_categories_en.md new file mode 100644 index 00000000000000..fdfa063057616a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_base_uncased_ag_news_finetuned_dwnews_categories_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_base_uncased_ag_news_finetuned_dwnews_categories BertForSequenceClassification from tillschwoerer +author: John Snow Labs +name: bert_base_uncased_ag_news_finetuned_dwnews_categories +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_ag_news_finetuned_dwnews_categories` is a English model originally trained by tillschwoerer. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_ag_news_finetuned_dwnews_categories_en_5.1.4_3.4_1698217799442.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_ag_news_finetuned_dwnews_categories_en_5.1.4_3.4_1698217799442.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_base_uncased_ag_news_finetuned_dwnews_categories","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_base_uncased_ag_news_finetuned_dwnews_categories","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_ag_news_finetuned_dwnews_categories| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/tillschwoerer/bert-base-uncased-ag-news-finetuned-dwnews-categories \ No newline at end of file From cbf9dd0a2105926287f1b64049c3a1dbe45d3a68 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 15:53:29 +0700 Subject: [PATCH 079/301] Add model 2023-10-25-rubert_large_emotion_russian_cedr_m7_en --- ...rubert_large_emotion_russian_cedr_m7_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-rubert_large_emotion_russian_cedr_m7_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-rubert_large_emotion_russian_cedr_m7_en.md b/docs/_posts/ahmedlone127/2023-10-25-rubert_large_emotion_russian_cedr_m7_en.md new file mode 100644 index 00000000000000..f9b4f7acf93a44 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-rubert_large_emotion_russian_cedr_m7_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English rubert_large_emotion_russian_cedr_m7 BertForSequenceClassification from Aniemore +author: John Snow Labs +name: rubert_large_emotion_russian_cedr_m7 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`rubert_large_emotion_russian_cedr_m7` is a English model originally trained by Aniemore. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/rubert_large_emotion_russian_cedr_m7_en_5.1.4_3.4_1698208388605.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/rubert_large_emotion_russian_cedr_m7_en_5.1.4_3.4_1698208388605.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("rubert_large_emotion_russian_cedr_m7","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("rubert_large_emotion_russian_cedr_m7","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|rubert_large_emotion_russian_cedr_m7| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|1.6 GB| + +## References + +https://huggingface.co/Aniemore/rubert-large-emotion-russian-cedr-m7 \ No newline at end of file From 9150e26803b87a7d39bdc6cc3699891ee4178b23 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 15:54:29 +0700 Subject: [PATCH 080/301] Add model 2023-10-25-mengzi_bert_base_fin_ssec_en --- ...2023-10-25-mengzi_bert_base_fin_ssec_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-mengzi_bert_base_fin_ssec_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-mengzi_bert_base_fin_ssec_en.md b/docs/_posts/ahmedlone127/2023-10-25-mengzi_bert_base_fin_ssec_en.md new file mode 100644 index 00000000000000..9ad40abc891aba --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-mengzi_bert_base_fin_ssec_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English mengzi_bert_base_fin_ssec BertForSequenceClassification from hw2942 +author: John Snow Labs +name: mengzi_bert_base_fin_ssec +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mengzi_bert_base_fin_ssec` is a English model originally trained by hw2942. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mengzi_bert_base_fin_ssec_en_5.1.4_3.4_1698222925679.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mengzi_bert_base_fin_ssec_en_5.1.4_3.4_1698222925679.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("mengzi_bert_base_fin_ssec","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("mengzi_bert_base_fin_ssec","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mengzi_bert_base_fin_ssec| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|383.1 MB| + +## References + +https://huggingface.co/hw2942/mengzi-bert-base-fin-SSEC \ No newline at end of file From e17d871c928f5b0d40287896dca167d19552be63 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 15:55:30 +0700 Subject: [PATCH 081/301] Add model 2023-10-25-ogbv_gender_bert_hindi_english_hasoc20a_fin_en --- ...nder_bert_hindi_english_hasoc20a_fin_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-ogbv_gender_bert_hindi_english_hasoc20a_fin_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-ogbv_gender_bert_hindi_english_hasoc20a_fin_en.md b/docs/_posts/ahmedlone127/2023-10-25-ogbv_gender_bert_hindi_english_hasoc20a_fin_en.md new file mode 100644 index 00000000000000..0cadc0726cfaa2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-ogbv_gender_bert_hindi_english_hasoc20a_fin_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English ogbv_gender_bert_hindi_english_hasoc20a_fin BertForSequenceClassification from Maha +author: John Snow Labs +name: ogbv_gender_bert_hindi_english_hasoc20a_fin +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ogbv_gender_bert_hindi_english_hasoc20a_fin` is a English model originally trained by Maha. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ogbv_gender_bert_hindi_english_hasoc20a_fin_en_5.1.4_3.4_1698224066734.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ogbv_gender_bert_hindi_english_hasoc20a_fin_en_5.1.4_3.4_1698224066734.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("ogbv_gender_bert_hindi_english_hasoc20a_fin","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("ogbv_gender_bert_hindi_english_hasoc20a_fin","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ogbv_gender_bert_hindi_english_hasoc20a_fin| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|667.3 MB| + +## References + +https://huggingface.co/Maha/OGBV-gender-bert-hi-en-hasoc20a-fin \ No newline at end of file From 0b945d111fa3fd0006c44ed2b732837facfe7346 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 15:56:30 +0700 Subject: [PATCH 082/301] Add model 2023-10-25-incivility_v2_final_tulio_chilean_spanish_bert_en --- ..._v2_final_tulio_chilean_spanish_bert_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-incivility_v2_final_tulio_chilean_spanish_bert_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-incivility_v2_final_tulio_chilean_spanish_bert_en.md b/docs/_posts/ahmedlone127/2023-10-25-incivility_v2_final_tulio_chilean_spanish_bert_en.md new file mode 100644 index 00000000000000..b2d9d776af8270 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-incivility_v2_final_tulio_chilean_spanish_bert_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English incivility_v2_final_tulio_chilean_spanish_bert BertForSequenceClassification from jorgeortizfuentes +author: John Snow Labs +name: incivility_v2_final_tulio_chilean_spanish_bert +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`incivility_v2_final_tulio_chilean_spanish_bert` is a English model originally trained by jorgeortizfuentes. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/incivility_v2_final_tulio_chilean_spanish_bert_en_5.1.4_3.4_1698209746645.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/incivility_v2_final_tulio_chilean_spanish_bert_en_5.1.4_3.4_1698209746645.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("incivility_v2_final_tulio_chilean_spanish_bert","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("incivility_v2_final_tulio_chilean_spanish_bert","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|incivility_v2_final_tulio_chilean_spanish_bert| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|411.5 MB| + +## References + +https://huggingface.co/jorgeortizfuentes/incivility_v2_final-tulio-chilean-spanish-bert \ No newline at end of file From d085ffb058be9c346a1d6543b0e3dde0cb1e5f23 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 15:57:30 +0700 Subject: [PATCH 083/301] Add model 2023-10-25-bert_ft_cola_84_en --- .../2023-10-25-bert_ft_cola_84_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_ft_cola_84_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_ft_cola_84_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_ft_cola_84_en.md new file mode 100644 index 00000000000000..bba56c7c70c7c7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_ft_cola_84_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_ft_cola_84 BertForSequenceClassification from Jeevesh8 +author: John Snow Labs +name: bert_ft_cola_84 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_ft_cola_84` is a English model originally trained by Jeevesh8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_ft_cola_84_en_5.1.4_3.4_1698224151148.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_ft_cola_84_en_5.1.4_3.4_1698224151148.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_ft_cola_84","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_ft_cola_84","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_ft_cola_84| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Jeevesh8/bert_ft_cola-84 \ No newline at end of file From 191ee13973cb5af3acb42c53e524bbe92dacdaa0 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 15:58:30 +0700 Subject: [PATCH 084/301] Add model 2023-10-25-goog_bert_ft_cola_63_en --- .../2023-10-25-goog_bert_ft_cola_63_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_63_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_63_en.md b/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_63_en.md new file mode 100644 index 00000000000000..37e917d2144fad --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_63_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English goog_bert_ft_cola_63 BertForSequenceClassification from Jeevesh8 +author: John Snow Labs +name: goog_bert_ft_cola_63 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`goog_bert_ft_cola_63` is a English model originally trained by Jeevesh8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/goog_bert_ft_cola_63_en_5.1.4_3.4_1698224150303.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/goog_bert_ft_cola_63_en_5.1.4_3.4_1698224150303.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("goog_bert_ft_cola_63","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("goog_bert_ft_cola_63","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|goog_bert_ft_cola_63| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Jeevesh8/goog_bert_ft_cola-63 \ No newline at end of file From 00d19982de901da63bcc86288268c4b40ba01e8f Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 15:59:30 +0700 Subject: [PATCH 085/301] Add model 2023-10-25-goog_bert_ft_cola_44_en --- .../2023-10-25-goog_bert_ft_cola_44_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_44_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_44_en.md b/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_44_en.md new file mode 100644 index 00000000000000..86685d30bbac38 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_44_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English goog_bert_ft_cola_44 BertForSequenceClassification from Jeevesh8 +author: John Snow Labs +name: goog_bert_ft_cola_44 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`goog_bert_ft_cola_44` is a English model originally trained by Jeevesh8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/goog_bert_ft_cola_44_en_5.1.4_3.4_1698222126299.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/goog_bert_ft_cola_44_en_5.1.4_3.4_1698222126299.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("goog_bert_ft_cola_44","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("goog_bert_ft_cola_44","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|goog_bert_ft_cola_44| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Jeevesh8/goog_bert_ft_cola-44 \ No newline at end of file From 158c007fbfb9ecc6a3d50ea67942ef546a0379bd Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 16:00:30 +0700 Subject: [PATCH 086/301] Add model 2023-10-25-goog_bert_ft_cola_14_en --- .../2023-10-25-goog_bert_ft_cola_14_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_14_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_14_en.md b/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_14_en.md new file mode 100644 index 00000000000000..926c43f8080c6f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_14_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English goog_bert_ft_cola_14 BertForSequenceClassification from Jeevesh8 +author: John Snow Labs +name: goog_bert_ft_cola_14 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`goog_bert_ft_cola_14` is a English model originally trained by Jeevesh8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/goog_bert_ft_cola_14_en_5.1.4_3.4_1698216461612.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/goog_bert_ft_cola_14_en_5.1.4_3.4_1698216461612.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("goog_bert_ft_cola_14","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("goog_bert_ft_cola_14","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|goog_bert_ft_cola_14| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Jeevesh8/goog_bert_ft_cola-14 \ No newline at end of file From aba88011b5859ebe6fcd22527844d4b96e54599a Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 16:01:31 +0700 Subject: [PATCH 087/301] Add model 2023-10-25-hindi_const21_hibert_final_en --- ...023-10-25-hindi_const21_hibert_final_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-hindi_const21_hibert_final_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-hindi_const21_hibert_final_en.md b/docs/_posts/ahmedlone127/2023-10-25-hindi_const21_hibert_final_en.md new file mode 100644 index 00000000000000..67a0c24f3195c4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-hindi_const21_hibert_final_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English hindi_const21_hibert_final BertForSequenceClassification from Maha +author: John Snow Labs +name: hindi_const21_hibert_final +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`hindi_const21_hibert_final` is a English model originally trained by Maha. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/hindi_const21_hibert_final_en_5.1.4_3.4_1698224332652.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/hindi_const21_hibert_final_en_5.1.4_3.4_1698224332652.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("hindi_const21_hibert_final","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("hindi_const21_hibert_final","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|hindi_const21_hibert_final| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|611.4 MB| + +## References + +https://huggingface.co/Maha/hi-const21-hibert_final \ No newline at end of file From babfc32f678afd00142309707cc4c377f3adc339 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 16:02:31 +0700 Subject: [PATCH 088/301] Add model 2023-10-25-goog_bert_ft_cola_68_en --- .../2023-10-25-goog_bert_ft_cola_68_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_68_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_68_en.md b/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_68_en.md new file mode 100644 index 00000000000000..a6dddfecc8d91c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_68_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English goog_bert_ft_cola_68 BertForSequenceClassification from Jeevesh8 +author: John Snow Labs +name: goog_bert_ft_cola_68 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`goog_bert_ft_cola_68` is a English model originally trained by Jeevesh8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/goog_bert_ft_cola_68_en_5.1.4_3.4_1698223587499.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/goog_bert_ft_cola_68_en_5.1.4_3.4_1698223587499.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("goog_bert_ft_cola_68","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("goog_bert_ft_cola_68","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|goog_bert_ft_cola_68| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Jeevesh8/goog_bert_ft_cola-68 \ No newline at end of file From e5efe72244531ad79076fe8753e6617a1e6a0c53 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 16:03:32 +0700 Subject: [PATCH 089/301] Add model 2023-10-25-goog_bert_ft_cola_69_en --- .../2023-10-25-goog_bert_ft_cola_69_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_69_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_69_en.md b/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_69_en.md new file mode 100644 index 00000000000000..124ef297147464 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_69_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English goog_bert_ft_cola_69 BertForSequenceClassification from Jeevesh8 +author: John Snow Labs +name: goog_bert_ft_cola_69 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`goog_bert_ft_cola_69` is a English model originally trained by Jeevesh8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/goog_bert_ft_cola_69_en_5.1.4_3.4_1698224583571.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/goog_bert_ft_cola_69_en_5.1.4_3.4_1698224583571.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("goog_bert_ft_cola_69","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("goog_bert_ft_cola_69","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|goog_bert_ft_cola_69| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Jeevesh8/goog_bert_ft_cola-69 \ No newline at end of file From 21de2057a3b0161e12f3e2c611bda8490b53f617 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 16:04:32 +0700 Subject: [PATCH 090/301] Add model 2023-10-25-goog_bert_ft_cola_43_en --- .../2023-10-25-goog_bert_ft_cola_43_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_43_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_43_en.md b/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_43_en.md new file mode 100644 index 00000000000000..98b58fc5562163 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_43_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English goog_bert_ft_cola_43 BertForSequenceClassification from Jeevesh8 +author: John Snow Labs +name: goog_bert_ft_cola_43 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`goog_bert_ft_cola_43` is a English model originally trained by Jeevesh8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/goog_bert_ft_cola_43_en_5.1.4_3.4_1698221771690.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/goog_bert_ft_cola_43_en_5.1.4_3.4_1698221771690.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("goog_bert_ft_cola_43","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("goog_bert_ft_cola_43","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|goog_bert_ft_cola_43| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Jeevesh8/goog_bert_ft_cola-43 \ No newline at end of file From 458c2871c6518193839d4663c874bfb4a3c1bec7 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 16:05:32 +0700 Subject: [PATCH 091/301] Add model 2023-10-25-bert_large_uncased_sst_2_16_13_30_en --- ...25-bert_large_uncased_sst_2_16_13_30_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_large_uncased_sst_2_16_13_30_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_large_uncased_sst_2_16_13_30_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_large_uncased_sst_2_16_13_30_en.md new file mode 100644 index 00000000000000..0f548ac7d8a7ca --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_large_uncased_sst_2_16_13_30_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_large_uncased_sst_2_16_13_30 BertForSequenceClassification from simonycl +author: John Snow Labs +name: bert_large_uncased_sst_2_16_13_30 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_uncased_sst_2_16_13_30` is a English model originally trained by simonycl. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_uncased_sst_2_16_13_30_en_5.1.4_3.4_1698224647052.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_uncased_sst_2_16_13_30_en_5.1.4_3.4_1698224647052.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_large_uncased_sst_2_16_13_30","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_large_uncased_sst_2_16_13_30","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_uncased_sst_2_16_13_30| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/simonycl/bert-large-uncased-sst-2-16-13-30 \ No newline at end of file From 8d35adb93b7ff4e24c4d7b3d43abdccd889b10a1 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 16:06:33 +0700 Subject: [PATCH 092/301] Add model 2023-10-25-bert_classifier_bert_base_german_cased_gnad10_de --- ...sifier_bert_base_german_cased_gnad10_de.md | 106 ++++++++++++++++++ 1 file changed, 106 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_classifier_bert_base_german_cased_gnad10_de.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_classifier_bert_base_german_cased_gnad10_de.md b/docs/_posts/ahmedlone127/2023-10-25-bert_classifier_bert_base_german_cased_gnad10_de.md new file mode 100644 index 00000000000000..85d5d53ee2be45 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_classifier_bert_base_german_cased_gnad10_de.md @@ -0,0 +1,106 @@ +--- +layout: model +title: German BertForSequenceClassification Base Cased model (from Mathking) +author: John Snow Labs +name: bert_classifier_bert_base_german_cased_gnad10 +date: 2023-10-25 +tags: [de, open_source, bert, sequence_classification, classification, onnx] +task: Text Classification +language: de +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `bert-base-german-cased-gnad10` is a German model originally trained by `Mathking`. + +## Predicted Entities + +`Wirtschaft`, `Panorama`, `Web`, `Inland`, `Etat`, `Wissenschaft`, `International`, `Sport`, `Kultur` + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_classifier_bert_base_german_cased_gnad10_de_5.1.4_3.4_1698224767090.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_classifier_bert_base_german_cased_gnad10_de_5.1.4_3.4_1698224767090.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +seq_classifier = BertForSequenceClassification.pretrained("bert_classifier_bert_base_german_cased_gnad10","de") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("class") + +pipeline = Pipeline(stages=[documentAssembler, tokenizer, seq_classifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() + .setInputCols(Array("text")) + .setOutputCols(Array("document")) + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val seq_classifier = BertForSequenceClassification.pretrained("bert_classifier_bert_base_german_cased_gnad10","de") + .setInputCols(Array("document", "token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, seq_classifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("de.classify.bert.cased_base.by_mathking").predict("""PUT YOUR STRING HERE""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_classifier_bert_base_german_cased_gnad10| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|de| +|Size:|409.1 MB| +|Case sensitive:|true| +|Max sentence length:|256| + +## References + +References + +- https://huggingface.co/Mathking/bert-base-german-cased-gnad10 \ No newline at end of file From 57da0ccf8121cab6d9cce92b19e16a24db0c11b1 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 16:07:32 +0700 Subject: [PATCH 093/301] Add model 2023-10-25-bert_finetuned_mrpc_tiansiyuan_en --- ...10-25-bert_finetuned_mrpc_tiansiyuan_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_finetuned_mrpc_tiansiyuan_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_finetuned_mrpc_tiansiyuan_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_finetuned_mrpc_tiansiyuan_en.md new file mode 100644 index 00000000000000..2fd92f028df865 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_finetuned_mrpc_tiansiyuan_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_finetuned_mrpc_tiansiyuan BertForSequenceClassification from tiansiyuan +author: John Snow Labs +name: bert_finetuned_mrpc_tiansiyuan +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_finetuned_mrpc_tiansiyuan` is a English model originally trained by tiansiyuan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_finetuned_mrpc_tiansiyuan_en_5.1.4_3.4_1698223815854.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_finetuned_mrpc_tiansiyuan_en_5.1.4_3.4_1698223815854.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_finetuned_mrpc_tiansiyuan","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_finetuned_mrpc_tiansiyuan","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_finetuned_mrpc_tiansiyuan| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/tiansiyuan/bert-finetuned-mrpc \ No newline at end of file From 339063e5e838659b710f4e15f343d088922cec88 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 16:08:33 +0700 Subject: [PATCH 094/301] Add model 2023-10-25-bert_ft_cola_87_en --- .../2023-10-25-bert_ft_cola_87_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_ft_cola_87_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_ft_cola_87_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_ft_cola_87_en.md new file mode 100644 index 00000000000000..0892b6a6eb38ac --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_ft_cola_87_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_ft_cola_87 BertForSequenceClassification from Jeevesh8 +author: John Snow Labs +name: bert_ft_cola_87 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_ft_cola_87` is a English model originally trained by Jeevesh8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_ft_cola_87_en_5.1.4_3.4_1698224817220.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_ft_cola_87_en_5.1.4_3.4_1698224817220.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_ft_cola_87","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_ft_cola_87","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_ft_cola_87| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Jeevesh8/bert_ft_cola-87 \ No newline at end of file From 403d45f83411bf6397b36c9c4f03acb4b854e398 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 16:09:33 +0700 Subject: [PATCH 095/301] Add model 2023-10-25-goog_bert_ft_cola_52_en --- .../2023-10-25-goog_bert_ft_cola_52_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_52_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_52_en.md b/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_52_en.md new file mode 100644 index 00000000000000..0213934506a26a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_52_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English goog_bert_ft_cola_52 BertForSequenceClassification from Jeevesh8 +author: John Snow Labs +name: goog_bert_ft_cola_52 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`goog_bert_ft_cola_52` is a English model originally trained by Jeevesh8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/goog_bert_ft_cola_52_en_5.1.4_3.4_1698224786799.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/goog_bert_ft_cola_52_en_5.1.4_3.4_1698224786799.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("goog_bert_ft_cola_52","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("goog_bert_ft_cola_52","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|goog_bert_ft_cola_52| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Jeevesh8/goog_bert_ft_cola-52 \ No newline at end of file From f12ccec04b3ab0d1c7e7b1a75946fb3d27262bee Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 16:10:33 +0700 Subject: [PATCH 096/301] Add model 2023-10-25-bert_tiny_finetuned_qqp_en --- .../2023-10-25-bert_tiny_finetuned_qqp_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_tiny_finetuned_qqp_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_tiny_finetuned_qqp_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_tiny_finetuned_qqp_en.md new file mode 100644 index 00000000000000..ffe2e048cb4561 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_tiny_finetuned_qqp_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_tiny_finetuned_qqp BertForSequenceClassification from M-FAC +author: John Snow Labs +name: bert_tiny_finetuned_qqp +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_tiny_finetuned_qqp` is a English model originally trained by M-FAC. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_tiny_finetuned_qqp_en_5.1.4_3.4_1698222915759.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_tiny_finetuned_qqp_en_5.1.4_3.4_1698222915759.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_tiny_finetuned_qqp","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_tiny_finetuned_qqp","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_tiny_finetuned_qqp| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|16.7 MB| + +## References + +https://huggingface.co/M-FAC/bert-tiny-finetuned-qqp \ No newline at end of file From f39bb4515f0363302ae051b30c2ea1ed13a82f0e Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 16:11:33 +0700 Subject: [PATCH 097/301] Add model 2023-10-25-goog_bert_ft_cola_53_en --- .../2023-10-25-goog_bert_ft_cola_53_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_53_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_53_en.md b/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_53_en.md new file mode 100644 index 00000000000000..71f33047ff8ca0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_53_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English goog_bert_ft_cola_53 BertForSequenceClassification from Jeevesh8 +author: John Snow Labs +name: goog_bert_ft_cola_53 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`goog_bert_ft_cola_53` is a English model originally trained by Jeevesh8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/goog_bert_ft_cola_53_en_5.1.4_3.4_1698225000509.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/goog_bert_ft_cola_53_en_5.1.4_3.4_1698225000509.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("goog_bert_ft_cola_53","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("goog_bert_ft_cola_53","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|goog_bert_ft_cola_53| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Jeevesh8/goog_bert_ft_cola-53 \ No newline at end of file From ce3b725a22a268598695e226b3cbbe59d1c2b140 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 16:12:33 +0700 Subject: [PATCH 098/301] Add model 2023-10-25-bert_base_uncased_sst_2_16_100_en --- ...10-25-bert_base_uncased_sst_2_16_100_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_base_uncased_sst_2_16_100_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_base_uncased_sst_2_16_100_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_base_uncased_sst_2_16_100_en.md new file mode 100644 index 00000000000000..5b7ae90489115b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_base_uncased_sst_2_16_100_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_base_uncased_sst_2_16_100 BertForSequenceClassification from simonycl +author: John Snow Labs +name: bert_base_uncased_sst_2_16_100 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_sst_2_16_100` is a English model originally trained by simonycl. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_sst_2_16_100_en_5.1.4_3.4_1698211268902.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_sst_2_16_100_en_5.1.4_3.4_1698211268902.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_base_uncased_sst_2_16_100","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_base_uncased_sst_2_16_100","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_sst_2_16_100| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/simonycl/bert-base-uncased-sst-2-16-100 \ No newline at end of file From 5e85db74ca2f0ee18b33615542ffd2b5cb1ef0d0 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 16:13:34 +0700 Subject: [PATCH 099/301] Add model 2023-10-25-bert_ft_cola_88_en --- .../2023-10-25-bert_ft_cola_88_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_ft_cola_88_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_ft_cola_88_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_ft_cola_88_en.md new file mode 100644 index 00000000000000..273bb864799e6e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_ft_cola_88_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_ft_cola_88 BertForSequenceClassification from Jeevesh8 +author: John Snow Labs +name: bert_ft_cola_88 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_ft_cola_88` is a English model originally trained by Jeevesh8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_ft_cola_88_en_5.1.4_3.4_1698225043797.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_ft_cola_88_en_5.1.4_3.4_1698225043797.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_ft_cola_88","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_ft_cola_88","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_ft_cola_88| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Jeevesh8/bert_ft_cola-88 \ No newline at end of file From 601196c25bd5421992c8491c6eace47b7e360199 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 16:14:34 +0700 Subject: [PATCH 100/301] Add model 2023-10-25-goog_bert_ft_cola_39_en --- .../2023-10-25-goog_bert_ft_cola_39_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_39_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_39_en.md b/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_39_en.md new file mode 100644 index 00000000000000..76d2b0e394ff87 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_39_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English goog_bert_ft_cola_39 BertForSequenceClassification from Jeevesh8 +author: John Snow Labs +name: goog_bert_ft_cola_39 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`goog_bert_ft_cola_39` is a English model originally trained by Jeevesh8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/goog_bert_ft_cola_39_en_5.1.4_3.4_1698221378949.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/goog_bert_ft_cola_39_en_5.1.4_3.4_1698221378949.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("goog_bert_ft_cola_39","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("goog_bert_ft_cola_39","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|goog_bert_ft_cola_39| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Jeevesh8/goog_bert_ft_cola-39 \ No newline at end of file From f6c847916c4387ec66bfbe93a1ea11509ef3854e Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 16:15:34 +0700 Subject: [PATCH 101/301] Add model 2023-10-25-goog_bert_ft_cola_60_en --- .../2023-10-25-goog_bert_ft_cola_60_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_60_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_60_en.md b/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_60_en.md new file mode 100644 index 00000000000000..8cbdb3d1be1f56 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_60_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English goog_bert_ft_cola_60 BertForSequenceClassification from Jeevesh8 +author: John Snow Labs +name: goog_bert_ft_cola_60 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`goog_bert_ft_cola_60` is a English model originally trained by Jeevesh8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/goog_bert_ft_cola_60_en_5.1.4_3.4_1698224363748.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/goog_bert_ft_cola_60_en_5.1.4_3.4_1698224363748.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("goog_bert_ft_cola_60","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("goog_bert_ft_cola_60","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|goog_bert_ft_cola_60| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Jeevesh8/goog_bert_ft_cola-60 \ No newline at end of file From 83fc3c9848c1c8d821d48b0501758b520b6995cd Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 16:16:34 +0700 Subject: [PATCH 102/301] Add model 2023-10-25-bert_mixed_en --- .../ahmedlone127/2023-10-25-bert_mixed_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_mixed_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_mixed_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_mixed_en.md new file mode 100644 index 00000000000000..49aa9d877edc14 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_mixed_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_mixed BertForSequenceClassification from PravallikaMyneni +author: John Snow Labs +name: bert_mixed +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_mixed` is a English model originally trained by PravallikaMyneni. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_mixed_en_5.1.4_3.4_1698222545171.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_mixed_en_5.1.4_3.4_1698222545171.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_mixed","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_mixed","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_mixed| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|405.9 MB| + +## References + +https://huggingface.co/PravallikaMyneni/bert_mixed \ No newline at end of file From 6ace6895e0e52051d80d0636d5b83d3775701293 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 16:17:35 +0700 Subject: [PATCH 103/301] Add model 2023-10-25-bert_ft_qqp_83_jeevesh8_en --- .../2023-10-25-bert_ft_qqp_83_jeevesh8_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_83_jeevesh8_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_83_jeevesh8_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_83_jeevesh8_en.md new file mode 100644 index 00000000000000..d199f7ff7379dc --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_83_jeevesh8_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_ft_qqp_83_jeevesh8 BertForSequenceClassification from Jeevesh8 +author: John Snow Labs +name: bert_ft_qqp_83_jeevesh8 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_ft_qqp_83_jeevesh8` is a English model originally trained by Jeevesh8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_ft_qqp_83_jeevesh8_en_5.1.4_3.4_1698219977145.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_ft_qqp_83_jeevesh8_en_5.1.4_3.4_1698219977145.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_ft_qqp_83_jeevesh8","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_ft_qqp_83_jeevesh8","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_ft_qqp_83_jeevesh8| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Jeevesh8/bert_ft_qqp-83 \ No newline at end of file From d2a87cc772cded11869dc51d10800d50bbcfef22 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 16:18:35 +0700 Subject: [PATCH 104/301] Add model 2023-10-25-bert_base_uncased_finetuned_question_v_statement_en --- ...cased_finetuned_question_v_statement_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_base_uncased_finetuned_question_v_statement_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_base_uncased_finetuned_question_v_statement_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_base_uncased_finetuned_question_v_statement_en.md new file mode 100644 index 00000000000000..a64c63361d443f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_base_uncased_finetuned_question_v_statement_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_base_uncased_finetuned_question_v_statement BertForSequenceClassification from mafwalter +author: John Snow Labs +name: bert_base_uncased_finetuned_question_v_statement +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_finetuned_question_v_statement` is a English model originally trained by mafwalter. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_finetuned_question_v_statement_en_5.1.4_3.4_1698222726543.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_finetuned_question_v_statement_en_5.1.4_3.4_1698222726543.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_base_uncased_finetuned_question_v_statement","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_base_uncased_finetuned_question_v_statement","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_finetuned_question_v_statement| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/mafwalter/bert-base-uncased-finetuned-question-v-statement \ No newline at end of file From 0cb3c6fb5fc5e2681ddad319a5f7a0dbe6f155d6 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 16:19:35 +0700 Subject: [PATCH 105/301] Add model 2023-10-25-bert_finetuning_test_milian_en --- ...23-10-25-bert_finetuning_test_milian_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_finetuning_test_milian_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_finetuning_test_milian_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_finetuning_test_milian_en.md new file mode 100644 index 00000000000000..8b245b1435a894 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_finetuning_test_milian_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_finetuning_test_milian BertForSequenceClassification from Milian +author: John Snow Labs +name: bert_finetuning_test_milian +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_finetuning_test_milian` is a English model originally trained by Milian. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_finetuning_test_milian_en_5.1.4_3.4_1698225530685.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_finetuning_test_milian_en_5.1.4_3.4_1698225530685.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_finetuning_test_milian","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_finetuning_test_milian","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_finetuning_test_milian| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Milian/bert_finetuning_test \ No newline at end of file From 479b4abaae9aca01aa7b795ec0653fe2f86d9368 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 16:20:36 +0700 Subject: [PATCH 106/301] Add model 2023-10-25-baseline_bert_large_cased_epoch3_batch4_lr2e_05_w0_01_en --- ...ge_cased_epoch3_batch4_lr2e_05_w0_01_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-baseline_bert_large_cased_epoch3_batch4_lr2e_05_w0_01_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-baseline_bert_large_cased_epoch3_batch4_lr2e_05_w0_01_en.md b/docs/_posts/ahmedlone127/2023-10-25-baseline_bert_large_cased_epoch3_batch4_lr2e_05_w0_01_en.md new file mode 100644 index 00000000000000..2f38d8481bb9a1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-baseline_bert_large_cased_epoch3_batch4_lr2e_05_w0_01_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English baseline_bert_large_cased_epoch3_batch4_lr2e_05_w0_01 BertForSequenceClassification from JerryYanJiang +author: John Snow Labs +name: baseline_bert_large_cased_epoch3_batch4_lr2e_05_w0_01 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`baseline_bert_large_cased_epoch3_batch4_lr2e_05_w0_01` is a English model originally trained by JerryYanJiang. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/baseline_bert_large_cased_epoch3_batch4_lr2e_05_w0_01_en_5.1.4_3.4_1698217405966.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/baseline_bert_large_cased_epoch3_batch4_lr2e_05_w0_01_en_5.1.4_3.4_1698217405966.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("baseline_bert_large_cased_epoch3_batch4_lr2e_05_w0_01","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("baseline_bert_large_cased_epoch3_batch4_lr2e_05_w0_01","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|baseline_bert_large_cased_epoch3_batch4_lr2e_05_w0_01| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/JerryYanJiang/baseline_bert-large-cased_epoch3_batch4_lr2e-05_w0.01 \ No newline at end of file From 3e2fddc98b0543de9ef106aafa476df4175f25b7 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 16:21:36 +0700 Subject: [PATCH 107/301] Add model 2023-10-25-goog_bert_ft_cola_36_en --- .../2023-10-25-goog_bert_ft_cola_36_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_36_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_36_en.md b/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_36_en.md new file mode 100644 index 00000000000000..cfcef8587f4844 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_36_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English goog_bert_ft_cola_36 BertForSequenceClassification from Jeevesh8 +author: John Snow Labs +name: goog_bert_ft_cola_36 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`goog_bert_ft_cola_36` is a English model originally trained by Jeevesh8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/goog_bert_ft_cola_36_en_5.1.4_3.4_1698220575602.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/goog_bert_ft_cola_36_en_5.1.4_3.4_1698220575602.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("goog_bert_ft_cola_36","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("goog_bert_ft_cola_36","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|goog_bert_ft_cola_36| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Jeevesh8/goog_bert_ft_cola-36 \ No newline at end of file From e68210e0358fac0e3bd1a7f0e80d0051a4636092 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 16:22:36 +0700 Subject: [PATCH 108/301] Add model 2023-10-25-goog_bert_ft_cola_48_en --- .../2023-10-25-goog_bert_ft_cola_48_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_48_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_48_en.md b/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_48_en.md new file mode 100644 index 00000000000000..abce1cdf1dcc68 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_48_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English goog_bert_ft_cola_48 BertForSequenceClassification from Jeevesh8 +author: John Snow Labs +name: goog_bert_ft_cola_48 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`goog_bert_ft_cola_48` is a English model originally trained by Jeevesh8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/goog_bert_ft_cola_48_en_5.1.4_3.4_1698225719514.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/goog_bert_ft_cola_48_en_5.1.4_3.4_1698225719514.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("goog_bert_ft_cola_48","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("goog_bert_ft_cola_48","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|goog_bert_ft_cola_48| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Jeevesh8/goog_bert_ft_cola-48 \ No newline at end of file From 3a3b46b482e33b22920fecf7832d3b43585ed802 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 16:23:36 +0700 Subject: [PATCH 109/301] Add model 2023-10-25-bert_ft_qqp_97_jeevesh8_en --- .../2023-10-25-bert_ft_qqp_97_jeevesh8_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_97_jeevesh8_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_97_jeevesh8_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_97_jeevesh8_en.md new file mode 100644 index 00000000000000..903dcfcb641ca0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_97_jeevesh8_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_ft_qqp_97_jeevesh8 BertForSequenceClassification from Jeevesh8 +author: John Snow Labs +name: bert_ft_qqp_97_jeevesh8 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_ft_qqp_97_jeevesh8` is a English model originally trained by Jeevesh8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_ft_qqp_97_jeevesh8_en_5.1.4_3.4_1698222743039.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_ft_qqp_97_jeevesh8_en_5.1.4_3.4_1698222743039.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_ft_qqp_97_jeevesh8","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_ft_qqp_97_jeevesh8","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_ft_qqp_97_jeevesh8| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Jeevesh8/bert_ft_qqp-97 \ No newline at end of file From 6ca0ee03aa773cdc9dcea3515881d722396c6310 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 16:24:37 +0700 Subject: [PATCH 110/301] Add model 2023-10-25-bert_ft_qqp_35_jeevesh8_en --- .../2023-10-25-bert_ft_qqp_35_jeevesh8_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_35_jeevesh8_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_35_jeevesh8_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_35_jeevesh8_en.md new file mode 100644 index 00000000000000..5255680f088f1c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_35_jeevesh8_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_ft_qqp_35_jeevesh8 BertForSequenceClassification from Jeevesh8 +author: John Snow Labs +name: bert_ft_qqp_35_jeevesh8 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_ft_qqp_35_jeevesh8` is a English model originally trained by Jeevesh8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_ft_qqp_35_jeevesh8_en_5.1.4_3.4_1698210737122.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_ft_qqp_35_jeevesh8_en_5.1.4_3.4_1698210737122.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_ft_qqp_35_jeevesh8","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_ft_qqp_35_jeevesh8","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_ft_qqp_35_jeevesh8| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Jeevesh8/bert_ft_qqp-35 \ No newline at end of file From 2b8c001a18d247e7f2f6e50ff8935e97c38c651e Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 16:25:37 +0700 Subject: [PATCH 111/301] Add model 2023-10-25-goog_bert_ft_cola_19_en --- .../2023-10-25-goog_bert_ft_cola_19_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_19_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_19_en.md b/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_19_en.md new file mode 100644 index 00000000000000..eff358c5fb7547 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_19_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English goog_bert_ft_cola_19 BertForSequenceClassification from Jeevesh8 +author: John Snow Labs +name: goog_bert_ft_cola_19 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`goog_bert_ft_cola_19` is a English model originally trained by Jeevesh8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/goog_bert_ft_cola_19_en_5.1.4_3.4_1698218217176.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/goog_bert_ft_cola_19_en_5.1.4_3.4_1698218217176.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("goog_bert_ft_cola_19","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("goog_bert_ft_cola_19","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|goog_bert_ft_cola_19| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Jeevesh8/goog_bert_ft_cola-19 \ No newline at end of file From 30ef3384acf8d4808696aaceaa2c1342fe6181cc Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 16:26:37 +0700 Subject: [PATCH 112/301] Add model 2023-10-25-bert_finetunning_test_jovenpai_en --- ...10-25-bert_finetunning_test_jovenpai_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_finetunning_test_jovenpai_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_finetunning_test_jovenpai_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_finetunning_test_jovenpai_en.md new file mode 100644 index 00000000000000..ef5149b8cd4d3c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_finetunning_test_jovenpai_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_finetunning_test_jovenpai BertForSequenceClassification from JovenPai +author: John Snow Labs +name: bert_finetunning_test_jovenpai +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_finetunning_test_jovenpai` is a English model originally trained by JovenPai. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_finetunning_test_jovenpai_en_5.1.4_3.4_1698220026934.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_finetunning_test_jovenpai_en_5.1.4_3.4_1698220026934.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_finetunning_test_jovenpai","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_finetunning_test_jovenpai","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_finetunning_test_jovenpai| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/JovenPai/bert_finetunning_test \ No newline at end of file From c876e181d6e6fd67fe78b98d4b2837b3b6a7497c Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 16:27:37 +0700 Subject: [PATCH 113/301] Add model 2023-10-25-bert_ft_cola_89_en --- .../2023-10-25-bert_ft_cola_89_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_ft_cola_89_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_ft_cola_89_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_ft_cola_89_en.md new file mode 100644 index 00000000000000..3d97ca2002d05c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_ft_cola_89_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_ft_cola_89 BertForSequenceClassification from Jeevesh8 +author: John Snow Labs +name: bert_ft_cola_89 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_ft_cola_89` is a English model originally trained by Jeevesh8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_ft_cola_89_en_5.1.4_3.4_1698225941627.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_ft_cola_89_en_5.1.4_3.4_1698225941627.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_ft_cola_89","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_ft_cola_89","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_ft_cola_89| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Jeevesh8/bert_ft_cola-89 \ No newline at end of file From f2b3df5f22adb987f100847c84ecf69467ebc701 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 16:28:37 +0700 Subject: [PATCH 114/301] Add model 2023-10-25-bert_ft_cola_83_en --- .../2023-10-25-bert_ft_cola_83_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_ft_cola_83_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_ft_cola_83_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_ft_cola_83_en.md new file mode 100644 index 00000000000000..a3ca40a8535b14 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_ft_cola_83_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_ft_cola_83 BertForSequenceClassification from Jeevesh8 +author: John Snow Labs +name: bert_ft_cola_83 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_ft_cola_83` is a English model originally trained by Jeevesh8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_ft_cola_83_en_5.1.4_3.4_1698223981519.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_ft_cola_83_en_5.1.4_3.4_1698223981519.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_ft_cola_83","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_ft_cola_83","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_ft_cola_83| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Jeevesh8/bert_ft_cola-83 \ No newline at end of file From afb0995b0651e3ff46332a86a4eb533d12755189 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 16:29:37 +0700 Subject: [PATCH 115/301] Add model 2023-10-25-goog_bert_ft_cola_75_en --- .../2023-10-25-goog_bert_ft_cola_75_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_75_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_75_en.md b/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_75_en.md new file mode 100644 index 00000000000000..c6b7e2cf9c1edd --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_75_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English goog_bert_ft_cola_75 BertForSequenceClassification from Jeevesh8 +author: John Snow Labs +name: goog_bert_ft_cola_75 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`goog_bert_ft_cola_75` is a English model originally trained by Jeevesh8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/goog_bert_ft_cola_75_en_5.1.4_3.4_1698223965234.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/goog_bert_ft_cola_75_en_5.1.4_3.4_1698223965234.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("goog_bert_ft_cola_75","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("goog_bert_ft_cola_75","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|goog_bert_ft_cola_75| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Jeevesh8/goog_bert_ft_cola-75 \ No newline at end of file From 7d059d9f9394d9f634163dc30b177a439f13357c Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 16:30:38 +0700 Subject: [PATCH 116/301] Add model 2023-10-25-bert_sequence_classifier_base_uncased_sst2_en --- ...equence_classifier_base_uncased_sst2_en.md | 101 ++++++++++++++++++ 1 file changed, 101 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_sequence_classifier_base_uncased_sst2_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_sequence_classifier_base_uncased_sst2_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_sequence_classifier_base_uncased_sst2_en.md new file mode 100644 index 00000000000000..bf377d771b78ab --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_sequence_classifier_base_uncased_sst2_en.md @@ -0,0 +1,101 @@ +--- +layout: model +title: English BertForSequenceClassification Base Uncased model (from JeremiahZ) +author: John Snow Labs +name: bert_sequence_classifier_base_uncased_sst2 +date: 2023-10-25 +tags: [en, open_source, bert, sequence_classification, ner, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `bert-base-uncased-sst2` is a English model originally trained by `JeremiahZ`. + +## Predicted Entities + +`positive`, `negative` + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_sequence_classifier_base_uncased_sst2_en_5.1.4_3.4_1698208956258.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_sequence_classifier_base_uncased_sst2_en_5.1.4_3.4_1698208956258.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_sequence_classifier_base_uncased_sst2","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("class") + +pipeline = Pipeline(stages=[documentAssembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_sequence_classifier_base_uncased_sst2","en") + .setInputCols(Array("document", "token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_sequence_classifier_base_uncased_sst2| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|409.4 MB| +|Case sensitive:|true| +|Max sentence length:|128| + +## References + +References + +- https://huggingface.co/JeremiahZ/bert-base-uncased-sst2 +- https://paperswithcode.com/sota?task=Text+Classification&dataset=GLUE+SST2 \ No newline at end of file From b31b31848a82dd139cbe9bcd9876b67a044d773a Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 16:31:38 +0700 Subject: [PATCH 117/301] Add model 2023-10-25-bert_large_uncased_sst_2_64_13_30_en --- ...25-bert_large_uncased_sst_2_64_13_30_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_large_uncased_sst_2_64_13_30_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_large_uncased_sst_2_64_13_30_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_large_uncased_sst_2_64_13_30_en.md new file mode 100644 index 00000000000000..7647d4ca3a2aab --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_large_uncased_sst_2_64_13_30_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_large_uncased_sst_2_64_13_30 BertForSequenceClassification from simonycl +author: John Snow Labs +name: bert_large_uncased_sst_2_64_13_30 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_uncased_sst_2_64_13_30` is a English model originally trained by simonycl. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_uncased_sst_2_64_13_30_en_5.1.4_3.4_1698226238586.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_uncased_sst_2_64_13_30_en_5.1.4_3.4_1698226238586.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_large_uncased_sst_2_64_13_30","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_large_uncased_sst_2_64_13_30","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_uncased_sst_2_64_13_30| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/simonycl/bert-large-uncased-sst-2-64-13-30 \ No newline at end of file From 6b5b41cbd66a9bd217d4218f81ccb6c74938245d Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 16:32:38 +0700 Subject: [PATCH 118/301] Add model 2023-10-25-tiny_bert_mrpc_distilled_en --- .../2023-10-25-tiny_bert_mrpc_distilled_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-tiny_bert_mrpc_distilled_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-tiny_bert_mrpc_distilled_en.md b/docs/_posts/ahmedlone127/2023-10-25-tiny_bert_mrpc_distilled_en.md new file mode 100644 index 00000000000000..80a136cb63b766 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-tiny_bert_mrpc_distilled_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English tiny_bert_mrpc_distilled BertForSequenceClassification from Sayan01 +author: John Snow Labs +name: tiny_bert_mrpc_distilled +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tiny_bert_mrpc_distilled` is a English model originally trained by Sayan01. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tiny_bert_mrpc_distilled_en_5.1.4_3.4_1698209972204.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tiny_bert_mrpc_distilled_en_5.1.4_3.4_1698209972204.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("tiny_bert_mrpc_distilled","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("tiny_bert_mrpc_distilled","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tiny_bert_mrpc_distilled| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|54.2 MB| + +## References + +https://huggingface.co/Sayan01/tiny-bert-mrpc-distilled \ No newline at end of file From 37ca5a30067f8971ab459ab9232438d312021954 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 16:33:39 +0700 Subject: [PATCH 119/301] Add model 2023-10-25-fine_tuned_indonli_translated_with_indobert_base_uncased_muhammadravi251001_en --- ...bert_base_uncased_muhammadravi251001_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-fine_tuned_indonli_translated_with_indobert_base_uncased_muhammadravi251001_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-fine_tuned_indonli_translated_with_indobert_base_uncased_muhammadravi251001_en.md b/docs/_posts/ahmedlone127/2023-10-25-fine_tuned_indonli_translated_with_indobert_base_uncased_muhammadravi251001_en.md new file mode 100644 index 00000000000000..ef47c1fe1b46f2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-fine_tuned_indonli_translated_with_indobert_base_uncased_muhammadravi251001_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English fine_tuned_indonli_translated_with_indobert_base_uncased_muhammadravi251001 BertForSequenceClassification from muhammadravi251001 +author: John Snow Labs +name: fine_tuned_indonli_translated_with_indobert_base_uncased_muhammadravi251001 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`fine_tuned_indonli_translated_with_indobert_base_uncased_muhammadravi251001` is a English model originally trained by muhammadravi251001. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/fine_tuned_indonli_translated_with_indobert_base_uncased_muhammadravi251001_en_5.1.4_3.4_1698219097750.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/fine_tuned_indonli_translated_with_indobert_base_uncased_muhammadravi251001_en_5.1.4_3.4_1698219097750.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("fine_tuned_indonli_translated_with_indobert_base_uncased_muhammadravi251001","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("fine_tuned_indonli_translated_with_indobert_base_uncased_muhammadravi251001","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|fine_tuned_indonli_translated_with_indobert_base_uncased_muhammadravi251001| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|413.9 MB| + +## References + +https://huggingface.co/muhammadravi251001/fine-tuned-IndoNLI-Translated-with-indobert-base-uncased \ No newline at end of file From ebfc66b7cc27e1606529c1eebbc8c4aef05d079a Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 16:34:39 +0700 Subject: [PATCH 120/301] Add model 2023-10-25-bert_ft_qqp_64_jeevesh8_en --- .../2023-10-25-bert_ft_qqp_64_jeevesh8_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_64_jeevesh8_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_64_jeevesh8_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_64_jeevesh8_en.md new file mode 100644 index 00000000000000..41b955f83abbaa --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_64_jeevesh8_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_ft_qqp_64_jeevesh8 BertForSequenceClassification from Jeevesh8 +author: John Snow Labs +name: bert_ft_qqp_64_jeevesh8 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_ft_qqp_64_jeevesh8` is a English model originally trained by Jeevesh8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_ft_qqp_64_jeevesh8_en_5.1.4_3.4_1698216257062.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_ft_qqp_64_jeevesh8_en_5.1.4_3.4_1698216257062.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_ft_qqp_64_jeevesh8","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_ft_qqp_64_jeevesh8","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_ft_qqp_64_jeevesh8| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Jeevesh8/bert_ft_qqp-64 \ No newline at end of file From 5c6d4249f86623a7f0eb76004b528aa6a2d73870 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 16:35:39 +0700 Subject: [PATCH 121/301] Add model 2023-10-25-bert_ft_qqp_31_jeevesh8_en --- .../2023-10-25-bert_ft_qqp_31_jeevesh8_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_31_jeevesh8_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_31_jeevesh8_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_31_jeevesh8_en.md new file mode 100644 index 00000000000000..18fca0704094bb --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_31_jeevesh8_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_ft_qqp_31_jeevesh8 BertForSequenceClassification from Jeevesh8 +author: John Snow Labs +name: bert_ft_qqp_31_jeevesh8 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_ft_qqp_31_jeevesh8` is a English model originally trained by Jeevesh8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_ft_qqp_31_jeevesh8_en_5.1.4_3.4_1698209991407.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_ft_qqp_31_jeevesh8_en_5.1.4_3.4_1698209991407.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_ft_qqp_31_jeevesh8","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_ft_qqp_31_jeevesh8","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_ft_qqp_31_jeevesh8| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Jeevesh8/bert_ft_qqp-31 \ No newline at end of file From dcdc9a7f9aee759fde64667a82882029c3685022 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 16:36:39 +0700 Subject: [PATCH 122/301] Add model 2023-10-25-bert_base_arabic_camelbert_msa_sixteenth_xnli_finetuned_sarahadnan_en --- ..._sixteenth_xnli_finetuned_sarahadnan_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_base_arabic_camelbert_msa_sixteenth_xnli_finetuned_sarahadnan_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_base_arabic_camelbert_msa_sixteenth_xnli_finetuned_sarahadnan_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_base_arabic_camelbert_msa_sixteenth_xnli_finetuned_sarahadnan_en.md new file mode 100644 index 00000000000000..32f1070e740dba --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_base_arabic_camelbert_msa_sixteenth_xnli_finetuned_sarahadnan_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_base_arabic_camelbert_msa_sixteenth_xnli_finetuned_sarahadnan BertForSequenceClassification from SarahAdnan +author: John Snow Labs +name: bert_base_arabic_camelbert_msa_sixteenth_xnli_finetuned_sarahadnan +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_arabic_camelbert_msa_sixteenth_xnli_finetuned_sarahadnan` is a English model originally trained by SarahAdnan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_arabic_camelbert_msa_sixteenth_xnli_finetuned_sarahadnan_en_5.1.4_3.4_1698210534868.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_arabic_camelbert_msa_sixteenth_xnli_finetuned_sarahadnan_en_5.1.4_3.4_1698210534868.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_base_arabic_camelbert_msa_sixteenth_xnli_finetuned_sarahadnan","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_base_arabic_camelbert_msa_sixteenth_xnli_finetuned_sarahadnan","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_arabic_camelbert_msa_sixteenth_xnli_finetuned_sarahadnan| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|507.3 MB| + +## References + +https://huggingface.co/SarahAdnan/bert-base-arabic-camelbert-msa-sixteenth-xnli-finetuned \ No newline at end of file From 68f211843bec77df396cf1b57ec424cef4b3b0d7 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 16:37:40 +0700 Subject: [PATCH 123/301] Add model 2023-10-25-goog_bert_ft_cola_56_en --- .../2023-10-25-goog_bert_ft_cola_56_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_56_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_56_en.md b/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_56_en.md new file mode 100644 index 00000000000000..3cca9930f87549 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_56_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English goog_bert_ft_cola_56 BertForSequenceClassification from Jeevesh8 +author: John Snow Labs +name: goog_bert_ft_cola_56 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`goog_bert_ft_cola_56` is a English model originally trained by Jeevesh8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/goog_bert_ft_cola_56_en_5.1.4_3.4_1698226620229.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/goog_bert_ft_cola_56_en_5.1.4_3.4_1698226620229.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("goog_bert_ft_cola_56","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("goog_bert_ft_cola_56","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|goog_bert_ft_cola_56| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Jeevesh8/goog_bert_ft_cola-56 \ No newline at end of file From b376af0955a38da4245e254aa9eb841f6fa4041e Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 16:38:40 +0700 Subject: [PATCH 124/301] Add model 2023-10-25-std_0pnt2_bert_ft_cola_16_en --- ...2023-10-25-std_0pnt2_bert_ft_cola_16_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-std_0pnt2_bert_ft_cola_16_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-std_0pnt2_bert_ft_cola_16_en.md b/docs/_posts/ahmedlone127/2023-10-25-std_0pnt2_bert_ft_cola_16_en.md new file mode 100644 index 00000000000000..c0542f65b189d9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-std_0pnt2_bert_ft_cola_16_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English std_0pnt2_bert_ft_cola_16 BertForSequenceClassification from Jeevesh8 +author: John Snow Labs +name: std_0pnt2_bert_ft_cola_16 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`std_0pnt2_bert_ft_cola_16` is a English model originally trained by Jeevesh8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/std_0pnt2_bert_ft_cola_16_en_5.1.4_3.4_1698202138259.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/std_0pnt2_bert_ft_cola_16_en_5.1.4_3.4_1698202138259.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("std_0pnt2_bert_ft_cola_16","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("std_0pnt2_bert_ft_cola_16","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|std_0pnt2_bert_ft_cola_16| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Jeevesh8/std_0pnt2_bert_ft_cola-16 \ No newline at end of file From 189480f522b78155ee3ff1df391084ebbba251c2 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 16:39:40 +0700 Subject: [PATCH 125/301] Add model 2023-10-25-bert_mini_finetuned_mnli_en --- .../2023-10-25-bert_mini_finetuned_mnli_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_mini_finetuned_mnli_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_mini_finetuned_mnli_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_mini_finetuned_mnli_en.md new file mode 100644 index 00000000000000..c86eb14f202c19 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_mini_finetuned_mnli_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_mini_finetuned_mnli BertForSequenceClassification from M-FAC +author: John Snow Labs +name: bert_mini_finetuned_mnli +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_mini_finetuned_mnli` is a English model originally trained by M-FAC. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_mini_finetuned_mnli_en_5.1.4_3.4_1698222045987.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_mini_finetuned_mnli_en_5.1.4_3.4_1698222045987.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_mini_finetuned_mnli","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_mini_finetuned_mnli","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_mini_finetuned_mnli| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|42.1 MB| + +## References + +https://huggingface.co/M-FAC/bert-mini-finetuned-mnli \ No newline at end of file From cd1932f0ecdd739d5f5e78c815c3373357514435 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 16:40:40 +0700 Subject: [PATCH 126/301] Add model 2023-10-25-bert_base_uncased_mnli_sparse_70_unstructured_en --- ..._uncased_mnli_sparse_70_unstructured_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_base_uncased_mnli_sparse_70_unstructured_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_base_uncased_mnli_sparse_70_unstructured_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_base_uncased_mnli_sparse_70_unstructured_en.md new file mode 100644 index 00000000000000..57d101ad1b8671 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_base_uncased_mnli_sparse_70_unstructured_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_base_uncased_mnli_sparse_70_unstructured BertForSequenceClassification from Intel +author: John Snow Labs +name: bert_base_uncased_mnli_sparse_70_unstructured +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_mnli_sparse_70_unstructured` is a English model originally trained by Intel. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_mnli_sparse_70_unstructured_en_5.1.4_3.4_1698211070392.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_mnli_sparse_70_unstructured_en_5.1.4_3.4_1698211070392.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_base_uncased_mnli_sparse_70_unstructured","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_base_uncased_mnli_sparse_70_unstructured","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_mnli_sparse_70_unstructured| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|228.2 MB| + +## References + +https://huggingface.co/Intel/bert-base-uncased-mnli-sparse-70-unstructured \ No newline at end of file From f0c2c491c09aca5fe03c0c2a37e448d348aefd97 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 16:41:41 +0700 Subject: [PATCH 127/301] Add model 2023-10-25-bert_ft_cola_90_en --- .../2023-10-25-bert_ft_cola_90_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_ft_cola_90_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_ft_cola_90_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_ft_cola_90_en.md new file mode 100644 index 00000000000000..0dbfdf0e30c5e1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_ft_cola_90_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_ft_cola_90 BertForSequenceClassification from Jeevesh8 +author: John Snow Labs +name: bert_ft_cola_90 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_ft_cola_90` is a English model originally trained by Jeevesh8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_ft_cola_90_en_5.1.4_3.4_1698226789555.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_ft_cola_90_en_5.1.4_3.4_1698226789555.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_ft_cola_90","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_ft_cola_90","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_ft_cola_90| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Jeevesh8/bert_ft_cola-90 \ No newline at end of file From 767186d156f37c52f6e3edfed0aa1a0486a2766d Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 16:42:41 +0700 Subject: [PATCH 128/301] Add model 2023-10-25-finetuned_bert_mrpc_ndugar_en --- ...023-10-25-finetuned_bert_mrpc_ndugar_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-finetuned_bert_mrpc_ndugar_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-finetuned_bert_mrpc_ndugar_en.md b/docs/_posts/ahmedlone127/2023-10-25-finetuned_bert_mrpc_ndugar_en.md new file mode 100644 index 00000000000000..92a237b46c2e5b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-finetuned_bert_mrpc_ndugar_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English finetuned_bert_mrpc_ndugar BertForSequenceClassification from NDugar +author: John Snow Labs +name: finetuned_bert_mrpc_ndugar +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`finetuned_bert_mrpc_ndugar` is a English model originally trained by NDugar. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/finetuned_bert_mrpc_ndugar_en_5.1.4_3.4_1698226789566.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/finetuned_bert_mrpc_ndugar_en_5.1.4_3.4_1698226789566.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("finetuned_bert_mrpc_ndugar","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("finetuned_bert_mrpc_ndugar","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|finetuned_bert_mrpc_ndugar| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|405.9 MB| + +## References + +https://huggingface.co/NDugar/finetuned-bert-mrpc \ No newline at end of file From ccf621ca6aad0d20baf4e5765d7ed1152d96a264 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 16:43:41 +0700 Subject: [PATCH 129/301] Add model 2023-10-25-bert_ft_qqp_92_jeevesh8_en --- .../2023-10-25-bert_ft_qqp_92_jeevesh8_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_92_jeevesh8_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_92_jeevesh8_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_92_jeevesh8_en.md new file mode 100644 index 00000000000000..dc9a643130d58b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_92_jeevesh8_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_ft_qqp_92_jeevesh8 BertForSequenceClassification from Jeevesh8 +author: John Snow Labs +name: bert_ft_qqp_92_jeevesh8 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_ft_qqp_92_jeevesh8` is a English model originally trained by Jeevesh8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_ft_qqp_92_jeevesh8_en_5.1.4_3.4_1698221750750.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_ft_qqp_92_jeevesh8_en_5.1.4_3.4_1698221750750.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_ft_qqp_92_jeevesh8","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_ft_qqp_92_jeevesh8","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_ft_qqp_92_jeevesh8| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Jeevesh8/bert_ft_qqp-92 \ No newline at end of file From dfc53ba0f76751a4ff4df20c22cc256fc5361d57 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 16:44:42 +0700 Subject: [PATCH 130/301] Add model 2023-10-25-bert_ft_qqp_88_jeevesh8_en --- .../2023-10-25-bert_ft_qqp_88_jeevesh8_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_88_jeevesh8_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_88_jeevesh8_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_88_jeevesh8_en.md new file mode 100644 index 00000000000000..e8a257a0cec721 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_88_jeevesh8_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_ft_qqp_88_jeevesh8 BertForSequenceClassification from Jeevesh8 +author: John Snow Labs +name: bert_ft_qqp_88_jeevesh8 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_ft_qqp_88_jeevesh8` is a English model originally trained by Jeevesh8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_ft_qqp_88_jeevesh8_en_5.1.4_3.4_1698220946763.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_ft_qqp_88_jeevesh8_en_5.1.4_3.4_1698220946763.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_ft_qqp_88_jeevesh8","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_ft_qqp_88_jeevesh8","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_ft_qqp_88_jeevesh8| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Jeevesh8/bert_ft_qqp-88 \ No newline at end of file From 62570ea7ce1d31b2240f063ca6fed7a2eb80d0ca Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 16:45:42 +0700 Subject: [PATCH 131/301] Add model 2023-10-25-solved_finbert_tone_en --- .../2023-10-25-solved_finbert_tone_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-solved_finbert_tone_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-solved_finbert_tone_en.md b/docs/_posts/ahmedlone127/2023-10-25-solved_finbert_tone_en.md new file mode 100644 index 00000000000000..7c6d502cfcf9f8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-solved_finbert_tone_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English solved_finbert_tone BertForSequenceClassification from ldeb +author: John Snow Labs +name: solved_finbert_tone +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`solved_finbert_tone` is a English model originally trained by ldeb. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/solved_finbert_tone_en_5.1.4_3.4_1698211497295.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/solved_finbert_tone_en_5.1.4_3.4_1698211497295.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("solved_finbert_tone","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("solved_finbert_tone","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|solved_finbert_tone| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|411.6 MB| + +## References + +https://huggingface.co/ldeb/solved-finbert-tone \ No newline at end of file From 51575fe376b3942dd9b5ea9803193a665c1c62b5 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 16:46:42 +0700 Subject: [PATCH 132/301] Add model 2023-10-25-deprem_berturk_binary_tr --- .../2023-10-25-deprem_berturk_binary_tr.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-deprem_berturk_binary_tr.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-deprem_berturk_binary_tr.md b/docs/_posts/ahmedlone127/2023-10-25-deprem_berturk_binary_tr.md new file mode 100644 index 00000000000000..93a6c5af8e4e43 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-deprem_berturk_binary_tr.md @@ -0,0 +1,97 @@ +--- +layout: model +title: Turkish deprem_berturk_binary BertForSequenceClassification from ctoraman +author: John Snow Labs +name: deprem_berturk_binary +date: 2023-10-25 +tags: [bert, tr, open_source, sequence_classification, onnx] +task: Text Classification +language: tr +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deprem_berturk_binary` is a Turkish model originally trained by ctoraman. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deprem_berturk_binary_tr_5.1.4_3.4_1698218881162.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deprem_berturk_binary_tr_5.1.4_3.4_1698218881162.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("deprem_berturk_binary","tr")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("deprem_berturk_binary","tr") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deprem_berturk_binary| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|tr| +|Size:|414.5 MB| + +## References + +https://huggingface.co/ctoraman/deprem-berturk-binary \ No newline at end of file From 804817f25f920e3a21d1f0c5275575dc1a25b260 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 16:47:42 +0700 Subject: [PATCH 133/301] Add model 2023-10-25-bert_base_chinese_wallstreetcn_morning_news_market_overview_open_ssec_f1_v2_en --- ...news_market_overview_open_ssec_f1_v2_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_base_chinese_wallstreetcn_morning_news_market_overview_open_ssec_f1_v2_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_base_chinese_wallstreetcn_morning_news_market_overview_open_ssec_f1_v2_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_base_chinese_wallstreetcn_morning_news_market_overview_open_ssec_f1_v2_en.md new file mode 100644 index 00000000000000..54e7e89e90dfaf --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_base_chinese_wallstreetcn_morning_news_market_overview_open_ssec_f1_v2_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_base_chinese_wallstreetcn_morning_news_market_overview_open_ssec_f1_v2 BertForSequenceClassification from hw2942 +author: John Snow Labs +name: bert_base_chinese_wallstreetcn_morning_news_market_overview_open_ssec_f1_v2 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_chinese_wallstreetcn_morning_news_market_overview_open_ssec_f1_v2` is a English model originally trained by hw2942. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_chinese_wallstreetcn_morning_news_market_overview_open_ssec_f1_v2_en_5.1.4_3.4_1698222739007.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_chinese_wallstreetcn_morning_news_market_overview_open_ssec_f1_v2_en_5.1.4_3.4_1698222739007.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_base_chinese_wallstreetcn_morning_news_market_overview_open_ssec_f1_v2","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_base_chinese_wallstreetcn_morning_news_market_overview_open_ssec_f1_v2","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_chinese_wallstreetcn_morning_news_market_overview_open_ssec_f1_v2| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|383.3 MB| + +## References + +https://huggingface.co/hw2942/bert-base-chinese-wallstreetcn-morning-news-market-overview-open-SSEC-f1-v2 \ No newline at end of file From 4372759fce227df41071d12403305dc8f34071ae Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 16:48:43 +0700 Subject: [PATCH 134/301] Add model 2023-10-25-bertabaporu_portuguese_irony_en --- ...3-10-25-bertabaporu_portuguese_irony_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bertabaporu_portuguese_irony_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bertabaporu_portuguese_irony_en.md b/docs/_posts/ahmedlone127/2023-10-25-bertabaporu_portuguese_irony_en.md new file mode 100644 index 00000000000000..9f393d39e88ebc --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bertabaporu_portuguese_irony_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bertabaporu_portuguese_irony BertForSequenceClassification from pysentimiento +author: John Snow Labs +name: bertabaporu_portuguese_irony +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bertabaporu_portuguese_irony` is a English model originally trained by pysentimiento. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bertabaporu_portuguese_irony_en_5.1.4_3.4_1698227283769.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bertabaporu_portuguese_irony_en_5.1.4_3.4_1698227283769.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bertabaporu_portuguese_irony","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bertabaporu_portuguese_irony","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bertabaporu_portuguese_irony| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|507.0 MB| + +## References + +https://huggingface.co/pysentimiento/bertabaporu-pt-irony \ No newline at end of file From 1e61df9259d6939d34eabcba8e121cb457fb524f Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 16:49:43 +0700 Subject: [PATCH 135/301] Add model 2023-10-25-bert_sequence_classifier_persian_farsi_base_uncased_sentiment_snappfood_fa --- ...rsi_base_uncased_sentiment_snappfood_fa.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_sequence_classifier_persian_farsi_base_uncased_sentiment_snappfood_fa.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_sequence_classifier_persian_farsi_base_uncased_sentiment_snappfood_fa.md b/docs/_posts/ahmedlone127/2023-10-25-bert_sequence_classifier_persian_farsi_base_uncased_sentiment_snappfood_fa.md new file mode 100644 index 00000000000000..85056d89c3ccd6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_sequence_classifier_persian_farsi_base_uncased_sentiment_snappfood_fa.md @@ -0,0 +1,97 @@ +--- +layout: model +title: Persian bert_sequence_classifier_persian_farsi_base_uncased_sentiment_snappfood BertForSequenceClassification from HooshvareLab +author: John Snow Labs +name: bert_sequence_classifier_persian_farsi_base_uncased_sentiment_snappfood +date: 2023-10-25 +tags: [bert, fa, open_source, sequence_classification, onnx] +task: Text Classification +language: fa +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_sequence_classifier_persian_farsi_base_uncased_sentiment_snappfood` is a Persian model originally trained by HooshvareLab. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_sequence_classifier_persian_farsi_base_uncased_sentiment_snappfood_fa_5.1.4_3.4_1698210508316.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_sequence_classifier_persian_farsi_base_uncased_sentiment_snappfood_fa_5.1.4_3.4_1698210508316.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_sequence_classifier_persian_farsi_base_uncased_sentiment_snappfood","fa")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_sequence_classifier_persian_farsi_base_uncased_sentiment_snappfood","fa") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_sequence_classifier_persian_farsi_base_uncased_sentiment_snappfood| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|fa| +|Size:|608.7 MB| + +## References + +https://huggingface.co/HooshvareLab/bert-fa-base-uncased-sentiment-snappfood \ No newline at end of file From 5b5632e682242068507543b34118e9653b9096ba Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 16:50:43 +0700 Subject: [PATCH 136/301] Add model 2023-10-25-goog_bert_ft_cola_51_en --- .../2023-10-25-goog_bert_ft_cola_51_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_51_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_51_en.md b/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_51_en.md new file mode 100644 index 00000000000000..18bc0c70d31135 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_51_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English goog_bert_ft_cola_51 BertForSequenceClassification from Jeevesh8 +author: John Snow Labs +name: goog_bert_ft_cola_51 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`goog_bert_ft_cola_51` is a English model originally trained by Jeevesh8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/goog_bert_ft_cola_51_en_5.1.4_3.4_1698227400253.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/goog_bert_ft_cola_51_en_5.1.4_3.4_1698227400253.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("goog_bert_ft_cola_51","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("goog_bert_ft_cola_51","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|goog_bert_ft_cola_51| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Jeevesh8/goog_bert_ft_cola-51 \ No newline at end of file From 5d2eb7a41eb3bba59becc489ba4393dc161dce48 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 16:51:44 +0700 Subject: [PATCH 137/301] Add model 2023-10-25-bert_base_uncased_sst_2_32_13_en --- ...-10-25-bert_base_uncased_sst_2_32_13_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_base_uncased_sst_2_32_13_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_base_uncased_sst_2_32_13_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_base_uncased_sst_2_32_13_en.md new file mode 100644 index 00000000000000..18322a37c1ee8b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_base_uncased_sst_2_32_13_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_base_uncased_sst_2_32_13 BertForSequenceClassification from simonycl +author: John Snow Labs +name: bert_base_uncased_sst_2_32_13 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_sst_2_32_13` is a English model originally trained by simonycl. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_sst_2_32_13_en_5.1.4_3.4_1698211493421.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_sst_2_32_13_en_5.1.4_3.4_1698211493421.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_base_uncased_sst_2_32_13","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_base_uncased_sst_2_32_13","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_sst_2_32_13| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/simonycl/bert-base-uncased-sst-2-32-13 \ No newline at end of file From f22ebc1c9ab4eb9b4905767fa32f30474d933d02 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 16:52:44 +0700 Subject: [PATCH 138/301] Add model 2023-10-25-fine_tuned_indonli_translated_with_indobert_large_p2_en --- ...li_translated_with_indobert_large_p2_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-fine_tuned_indonli_translated_with_indobert_large_p2_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-fine_tuned_indonli_translated_with_indobert_large_p2_en.md b/docs/_posts/ahmedlone127/2023-10-25-fine_tuned_indonli_translated_with_indobert_large_p2_en.md new file mode 100644 index 00000000000000..37b51526c73bce --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-fine_tuned_indonli_translated_with_indobert_large_p2_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English fine_tuned_indonli_translated_with_indobert_large_p2 BertForSequenceClassification from afaji +author: John Snow Labs +name: fine_tuned_indonli_translated_with_indobert_large_p2 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`fine_tuned_indonli_translated_with_indobert_large_p2` is a English model originally trained by afaji. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/fine_tuned_indonli_translated_with_indobert_large_p2_en_5.1.4_3.4_1698220492900.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/fine_tuned_indonli_translated_with_indobert_large_p2_en_5.1.4_3.4_1698220492900.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("fine_tuned_indonli_translated_with_indobert_large_p2","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("fine_tuned_indonli_translated_with_indobert_large_p2","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|fine_tuned_indonli_translated_with_indobert_large_p2| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/afaji/fine-tuned-IndoNLI-Translated-with-indobert-large-p2 \ No newline at end of file From 4b8663a01deed3b7336eb30629cf0d2971c79fcd Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 16:53:44 +0700 Subject: [PATCH 139/301] Add model 2023-10-25-goog_bert_ft_cola_37_en --- .../2023-10-25-goog_bert_ft_cola_37_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_37_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_37_en.md b/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_37_en.md new file mode 100644 index 00000000000000..02cdb814d6ab38 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_37_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English goog_bert_ft_cola_37 BertForSequenceClassification from Jeevesh8 +author: John Snow Labs +name: goog_bert_ft_cola_37 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`goog_bert_ft_cola_37` is a English model originally trained by Jeevesh8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/goog_bert_ft_cola_37_en_5.1.4_3.4_1698220419732.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/goog_bert_ft_cola_37_en_5.1.4_3.4_1698220419732.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("goog_bert_ft_cola_37","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("goog_bert_ft_cola_37","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|goog_bert_ft_cola_37| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Jeevesh8/goog_bert_ft_cola-37 \ No newline at end of file From fdfde467ccf28f26f856e58cddc9ff87453f52b5 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 16:54:44 +0700 Subject: [PATCH 140/301] Add model 2023-10-25-std_0pnt2_bert_ft_cola_62_en --- ...2023-10-25-std_0pnt2_bert_ft_cola_62_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-std_0pnt2_bert_ft_cola_62_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-std_0pnt2_bert_ft_cola_62_en.md b/docs/_posts/ahmedlone127/2023-10-25-std_0pnt2_bert_ft_cola_62_en.md new file mode 100644 index 00000000000000..2f78fd9098a28c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-std_0pnt2_bert_ft_cola_62_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English std_0pnt2_bert_ft_cola_62 BertForSequenceClassification from Jeevesh8 +author: John Snow Labs +name: std_0pnt2_bert_ft_cola_62 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`std_0pnt2_bert_ft_cola_62` is a English model originally trained by Jeevesh8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/std_0pnt2_bert_ft_cola_62_en_5.1.4_3.4_1698204456665.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/std_0pnt2_bert_ft_cola_62_en_5.1.4_3.4_1698204456665.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("std_0pnt2_bert_ft_cola_62","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("std_0pnt2_bert_ft_cola_62","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|std_0pnt2_bert_ft_cola_62| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Jeevesh8/std_0pnt2_bert_ft_cola-62 \ No newline at end of file From 76877707687b3e193aa730c4e92a7d841ae155c5 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 16:55:45 +0700 Subject: [PATCH 141/301] Add model 2023-10-25-bert_ft_cola_91_en --- .../2023-10-25-bert_ft_cola_91_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_ft_cola_91_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_ft_cola_91_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_ft_cola_91_en.md new file mode 100644 index 00000000000000..03b02307627fca --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_ft_cola_91_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_ft_cola_91 BertForSequenceClassification from Jeevesh8 +author: John Snow Labs +name: bert_ft_cola_91 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_ft_cola_91` is a English model originally trained by Jeevesh8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_ft_cola_91_en_5.1.4_3.4_1698227620626.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_ft_cola_91_en_5.1.4_3.4_1698227620626.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_ft_cola_91","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_ft_cola_91","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_ft_cola_91| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Jeevesh8/bert_ft_cola-91 \ No newline at end of file From 00e3eedc5ba6813b2f46e280234326a47984525f Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 16:56:45 +0700 Subject: [PATCH 142/301] Add model 2023-10-25-sentiment_hts2_hubert_hungarian_hu --- ...0-25-sentiment_hts2_hubert_hungarian_hu.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-sentiment_hts2_hubert_hungarian_hu.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-sentiment_hts2_hubert_hungarian_hu.md b/docs/_posts/ahmedlone127/2023-10-25-sentiment_hts2_hubert_hungarian_hu.md new file mode 100644 index 00000000000000..76fb842f567620 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-sentiment_hts2_hubert_hungarian_hu.md @@ -0,0 +1,97 @@ +--- +layout: model +title: Hungarian sentiment_hts2_hubert_hungarian BertForSequenceClassification from NYTK +author: John Snow Labs +name: sentiment_hts2_hubert_hungarian +date: 2023-10-25 +tags: [bert, hu, open_source, sequence_classification, onnx] +task: Text Classification +language: hu +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sentiment_hts2_hubert_hungarian` is a Hungarian model originally trained by NYTK. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sentiment_hts2_hubert_hungarian_hu_5.1.4_3.4_1698227753323.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sentiment_hts2_hubert_hungarian_hu_5.1.4_3.4_1698227753323.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("sentiment_hts2_hubert_hungarian","hu")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("sentiment_hts2_hubert_hungarian","hu") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sentiment_hts2_hubert_hungarian| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|hu| +|Size:|414.7 MB| + +## References + +https://huggingface.co/NYTK/sentiment-hts2-hubert-hungarian \ No newline at end of file From 265b2b7f2d75ec10e2c684bdd43e0eb5d892e09b Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 16:57:45 +0700 Subject: [PATCH 143/301] Add model 2023-10-25-bert_ft_qqp_36_jeevesh8_en --- .../2023-10-25-bert_ft_qqp_36_jeevesh8_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_36_jeevesh8_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_36_jeevesh8_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_36_jeevesh8_en.md new file mode 100644 index 00000000000000..fd58f173f8acd5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_36_jeevesh8_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_ft_qqp_36_jeevesh8 BertForSequenceClassification from Jeevesh8 +author: John Snow Labs +name: bert_ft_qqp_36_jeevesh8 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_ft_qqp_36_jeevesh8` is a English model originally trained by Jeevesh8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_ft_qqp_36_jeevesh8_en_5.1.4_3.4_1698210931104.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_ft_qqp_36_jeevesh8_en_5.1.4_3.4_1698210931104.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_ft_qqp_36_jeevesh8","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_ft_qqp_36_jeevesh8","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_ft_qqp_36_jeevesh8| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Jeevesh8/bert_ft_qqp-36 \ No newline at end of file From 2b4361ecc2d9356e7f405d1705733524ee93d0d1 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 16:58:45 +0700 Subject: [PATCH 144/301] Add model 2023-10-25-bert_wiki_comments_finetuned_en --- ...3-10-25-bert_wiki_comments_finetuned_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_wiki_comments_finetuned_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_wiki_comments_finetuned_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_wiki_comments_finetuned_en.md new file mode 100644 index 00000000000000..255d344248db6b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_wiki_comments_finetuned_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_wiki_comments_finetuned BertForSequenceClassification from DoyyingFace +author: John Snow Labs +name: bert_wiki_comments_finetuned +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_wiki_comments_finetuned` is a English model originally trained by DoyyingFace. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_wiki_comments_finetuned_en_5.1.4_3.4_1698201749015.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_wiki_comments_finetuned_en_5.1.4_3.4_1698201749015.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_wiki_comments_finetuned","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_wiki_comments_finetuned","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_wiki_comments_finetuned| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/DoyyingFace/bert-wiki-comments-finetuned \ No newline at end of file From 9d7a8e6fbe48aa033a4f9cf9da4e4dec9af5d485 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 16:59:45 +0700 Subject: [PATCH 145/301] Add model 2023-10-25-norsk_bert_fintuned_en --- .../2023-10-25-norsk_bert_fintuned_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-norsk_bert_fintuned_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-norsk_bert_fintuned_en.md b/docs/_posts/ahmedlone127/2023-10-25-norsk_bert_fintuned_en.md new file mode 100644 index 00000000000000..840c67aedb048e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-norsk_bert_fintuned_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English norsk_bert_fintuned BertForSequenceClassification from kirisums +author: John Snow Labs +name: norsk_bert_fintuned +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`norsk_bert_fintuned` is a English model originally trained by kirisums. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/norsk_bert_fintuned_en_5.1.4_3.4_1698224676169.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/norsk_bert_fintuned_en_5.1.4_3.4_1698224676169.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("norsk_bert_fintuned","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("norsk_bert_fintuned","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|norsk_bert_fintuned| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|668.4 MB| + +## References + +https://huggingface.co/kirisums/norsk-bert-fintuned \ No newline at end of file From 9a6f2ce08ceb8c80300a4241ab9b377b1ef5332b Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 17:00:46 +0700 Subject: [PATCH 146/301] Add model 2023-10-25-bert_sim_pair_en --- .../2023-10-25-bert_sim_pair_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_sim_pair_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_sim_pair_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_sim_pair_en.md new file mode 100644 index 00000000000000..c3ebb2668186f8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_sim_pair_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_sim_pair BertForSequenceClassification from LilaBoualili +author: John Snow Labs +name: bert_sim_pair +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_sim_pair` is a English model originally trained by LilaBoualili. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_sim_pair_en_5.1.4_3.4_1698221374777.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_sim_pair_en_5.1.4_3.4_1698221374777.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_sim_pair","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_sim_pair","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_sim_pair| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/LilaBoualili/bert-sim-pair \ No newline at end of file From 678e2ef5d960b639583c535e4c661501f14f8297 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 17:01:46 +0700 Subject: [PATCH 147/301] Add model 2023-10-25-bert_ft_qqp_62_jeevesh8_en --- .../2023-10-25-bert_ft_qqp_62_jeevesh8_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_62_jeevesh8_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_62_jeevesh8_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_62_jeevesh8_en.md new file mode 100644 index 00000000000000..77b77be0520796 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_62_jeevesh8_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_ft_qqp_62_jeevesh8 BertForSequenceClassification from Jeevesh8 +author: John Snow Labs +name: bert_ft_qqp_62_jeevesh8 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_ft_qqp_62_jeevesh8` is a English model originally trained by Jeevesh8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_ft_qqp_62_jeevesh8_en_5.1.4_3.4_1698215835889.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_ft_qqp_62_jeevesh8_en_5.1.4_3.4_1698215835889.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_ft_qqp_62_jeevesh8","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_ft_qqp_62_jeevesh8","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_ft_qqp_62_jeevesh8| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Jeevesh8/bert_ft_qqp-62 \ No newline at end of file From 7c8137bd46f22295db760400a51af154206960d9 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 17:02:46 +0700 Subject: [PATCH 148/301] Add model 2023-10-25-bert_base_uncased_nisadibipolar_en --- ...0-25-bert_base_uncased_nisadibipolar_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_base_uncased_nisadibipolar_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_base_uncased_nisadibipolar_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_base_uncased_nisadibipolar_en.md new file mode 100644 index 00000000000000..53fa91cdff27dc --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_base_uncased_nisadibipolar_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_base_uncased_nisadibipolar BertForSequenceClassification from Joom +author: John Snow Labs +name: bert_base_uncased_nisadibipolar +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_nisadibipolar` is a English model originally trained by Joom. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_nisadibipolar_en_5.1.4_3.4_1698228126069.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_nisadibipolar_en_5.1.4_3.4_1698228126069.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_base_uncased_nisadibipolar","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_base_uncased_nisadibipolar","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_nisadibipolar| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Joom/bert-base-uncased-NisadiBipolar \ No newline at end of file From f849370f095e7179ea32724c449f254ccb500742 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 17:03:46 +0700 Subject: [PATCH 149/301] Add model 2023-10-25-samyarn_bert_base_multilingual_cased_xx --- ...samyarn_bert_base_multilingual_cased_xx.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-samyarn_bert_base_multilingual_cased_xx.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-samyarn_bert_base_multilingual_cased_xx.md b/docs/_posts/ahmedlone127/2023-10-25-samyarn_bert_base_multilingual_cased_xx.md new file mode 100644 index 00000000000000..ea42b4ff0221b2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-samyarn_bert_base_multilingual_cased_xx.md @@ -0,0 +1,97 @@ +--- +layout: model +title: Multilingual samyarn_bert_base_multilingual_cased BertForSequenceClassification from Kao +author: John Snow Labs +name: samyarn_bert_base_multilingual_cased +date: 2023-10-25 +tags: [bert, xx, open_source, sequence_classification, onnx] +task: Text Classification +language: xx +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`samyarn_bert_base_multilingual_cased` is a Multilingual model originally trained by Kao. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/samyarn_bert_base_multilingual_cased_xx_5.1.4_3.4_1698220388695.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/samyarn_bert_base_multilingual_cased_xx_5.1.4_3.4_1698220388695.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("samyarn_bert_base_multilingual_cased","xx")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("samyarn_bert_base_multilingual_cased","xx") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|samyarn_bert_base_multilingual_cased| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|xx| +|Size:|667.3 MB| + +## References + +https://huggingface.co/Kao/samyarn-bert-base-multilingual-cased \ No newline at end of file From 1ebbb8d4914761974015c686f9bd1ed600dbd900 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 17:04:47 +0700 Subject: [PATCH 150/301] Add model 2023-10-25-bert_italian_sentiment_en --- .../2023-10-25-bert_italian_sentiment_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_italian_sentiment_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_italian_sentiment_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_italian_sentiment_en.md new file mode 100644 index 00000000000000..6b6a48ceebb185 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_italian_sentiment_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_italian_sentiment BertForSequenceClassification from pysentimiento +author: John Snow Labs +name: bert_italian_sentiment +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_italian_sentiment` is a English model originally trained by pysentimiento. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_italian_sentiment_en_5.1.4_3.4_1698221577302.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_italian_sentiment_en_5.1.4_3.4_1698221577302.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_italian_sentiment","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_italian_sentiment","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_italian_sentiment| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|411.9 MB| + +## References + +https://huggingface.co/pysentimiento/bert-it-sentiment \ No newline at end of file From f6edaa97b3edc7d5dcbed908af8fc3700ab54b48 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 17:05:47 +0700 Subject: [PATCH 151/301] Add model 2023-10-25-bert_ft_cola_92_en --- .../2023-10-25-bert_ft_cola_92_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_ft_cola_92_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_ft_cola_92_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_ft_cola_92_en.md new file mode 100644 index 00000000000000..399837865a7ada --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_ft_cola_92_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_ft_cola_92 BertForSequenceClassification from Jeevesh8 +author: John Snow Labs +name: bert_ft_cola_92 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_ft_cola_92` is a English model originally trained by Jeevesh8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_ft_cola_92_en_5.1.4_3.4_1698228298225.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_ft_cola_92_en_5.1.4_3.4_1698228298225.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_ft_cola_92","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_ft_cola_92","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_ft_cola_92| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Jeevesh8/bert_ft_cola-92 \ No newline at end of file From 15e1eb9c84f7bc715da3dc38d5abcb909322b640 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 17:06:48 +0700 Subject: [PATCH 152/301] Add model 2023-10-25-fine_tuned_koreanindonli_kornli_with_bert_kor_base_en --- ...eanindonli_kornli_with_bert_kor_base_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-fine_tuned_koreanindonli_kornli_with_bert_kor_base_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-fine_tuned_koreanindonli_kornli_with_bert_kor_base_en.md b/docs/_posts/ahmedlone127/2023-10-25-fine_tuned_koreanindonli_kornli_with_bert_kor_base_en.md new file mode 100644 index 00000000000000..afb80453f32e98 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-fine_tuned_koreanindonli_kornli_with_bert_kor_base_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English fine_tuned_koreanindonli_kornli_with_bert_kor_base BertForSequenceClassification from muhammadravi251001 +author: John Snow Labs +name: fine_tuned_koreanindonli_kornli_with_bert_kor_base +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`fine_tuned_koreanindonli_kornli_with_bert_kor_base` is a English model originally trained by muhammadravi251001. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/fine_tuned_koreanindonli_kornli_with_bert_kor_base_en_5.1.4_3.4_1698228339319.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/fine_tuned_koreanindonli_kornli_with_bert_kor_base_en_5.1.4_3.4_1698228339319.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("fine_tuned_koreanindonli_kornli_with_bert_kor_base","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("fine_tuned_koreanindonli_kornli_with_bert_kor_base","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|fine_tuned_koreanindonli_kornli_with_bert_kor_base| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|443.4 MB| + +## References + +https://huggingface.co/muhammadravi251001/fine-tuned-KoreanIndoNLI-KorNLI-with-bert-kor-base \ No newline at end of file From 634d6ffdf44d6484cc59e62f376e04efc06e8d49 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 17:07:48 +0700 Subject: [PATCH 153/301] Add model 2023-10-25-bert_mini_finetuned_qnli_en --- .../2023-10-25-bert_mini_finetuned_qnli_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_mini_finetuned_qnli_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_mini_finetuned_qnli_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_mini_finetuned_qnli_en.md new file mode 100644 index 00000000000000..06edc3f29f8485 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_mini_finetuned_qnli_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_mini_finetuned_qnli BertForSequenceClassification from M-FAC +author: John Snow Labs +name: bert_mini_finetuned_qnli +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_mini_finetuned_qnli` is a English model originally trained by M-FAC. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_mini_finetuned_qnli_en_5.1.4_3.4_1698222259511.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_mini_finetuned_qnli_en_5.1.4_3.4_1698222259511.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_mini_finetuned_qnli","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_mini_finetuned_qnli","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_mini_finetuned_qnli| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|42.1 MB| + +## References + +https://huggingface.co/M-FAC/bert-mini-finetuned-qnli \ No newline at end of file From 83a1dab50e146e59bb490596fe92acb20bc1e374 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 17:08:48 +0700 Subject: [PATCH 154/301] Add model 2023-10-25-bert_ft_qqp_94_jeevesh8_en --- .../2023-10-25-bert_ft_qqp_94_jeevesh8_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_94_jeevesh8_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_94_jeevesh8_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_94_jeevesh8_en.md new file mode 100644 index 00000000000000..2482eea032a407 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_94_jeevesh8_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_ft_qqp_94_jeevesh8 BertForSequenceClassification from Jeevesh8 +author: John Snow Labs +name: bert_ft_qqp_94_jeevesh8 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_ft_qqp_94_jeevesh8` is a English model originally trained by Jeevesh8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_ft_qqp_94_jeevesh8_en_5.1.4_3.4_1698222116709.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_ft_qqp_94_jeevesh8_en_5.1.4_3.4_1698222116709.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_ft_qqp_94_jeevesh8","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_ft_qqp_94_jeevesh8","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_ft_qqp_94_jeevesh8| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Jeevesh8/bert_ft_qqp-94 \ No newline at end of file From 1e0070226a08f57f87e8348c13ad073aa5dda337 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 17:09:48 +0700 Subject: [PATCH 155/301] Add model 2023-10-25-sentiment_hts5_hubert_hungarian_hu --- ...0-25-sentiment_hts5_hubert_hungarian_hu.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-sentiment_hts5_hubert_hungarian_hu.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-sentiment_hts5_hubert_hungarian_hu.md b/docs/_posts/ahmedlone127/2023-10-25-sentiment_hts5_hubert_hungarian_hu.md new file mode 100644 index 00000000000000..025fc5dbefc631 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-sentiment_hts5_hubert_hungarian_hu.md @@ -0,0 +1,97 @@ +--- +layout: model +title: Hungarian sentiment_hts5_hubert_hungarian BertForSequenceClassification from NYTK +author: John Snow Labs +name: sentiment_hts5_hubert_hungarian +date: 2023-10-25 +tags: [bert, hu, open_source, sequence_classification, onnx] +task: Text Classification +language: hu +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sentiment_hts5_hubert_hungarian` is a Hungarian model originally trained by NYTK. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sentiment_hts5_hubert_hungarian_hu_5.1.4_3.4_1698228545238.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sentiment_hts5_hubert_hungarian_hu_5.1.4_3.4_1698228545238.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("sentiment_hts5_hubert_hungarian","hu")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("sentiment_hts5_hubert_hungarian","hu") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sentiment_hts5_hubert_hungarian| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|hu| +|Size:|414.7 MB| + +## References + +https://huggingface.co/NYTK/sentiment-hts5-hubert-hungarian \ No newline at end of file From c2360ab51a3dd5e1e4ab517f2fdfb32ab4660bca Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 17:10:49 +0700 Subject: [PATCH 156/301] Add model 2023-10-25-bert_ft_qqp_77_jeevesh8_en --- .../2023-10-25-bert_ft_qqp_77_jeevesh8_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_77_jeevesh8_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_77_jeevesh8_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_77_jeevesh8_en.md new file mode 100644 index 00000000000000..e286b0611a9ece --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_77_jeevesh8_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_ft_qqp_77_jeevesh8 BertForSequenceClassification from Jeevesh8 +author: John Snow Labs +name: bert_ft_qqp_77_jeevesh8 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_ft_qqp_77_jeevesh8` is a English model originally trained by Jeevesh8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_ft_qqp_77_jeevesh8_en_5.1.4_3.4_1698218842115.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_ft_qqp_77_jeevesh8_en_5.1.4_3.4_1698218842115.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_ft_qqp_77_jeevesh8","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_ft_qqp_77_jeevesh8","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_ft_qqp_77_jeevesh8| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Jeevesh8/bert_ft_qqp-77 \ No newline at end of file From 4a1f43134422ea9c287528114870ef876590240b Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 17:11:49 +0700 Subject: [PATCH 157/301] Add model 2023-10-25-bert_ft_qqp_38_jeevesh8_en --- .../2023-10-25-bert_ft_qqp_38_jeevesh8_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_38_jeevesh8_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_38_jeevesh8_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_38_jeevesh8_en.md new file mode 100644 index 00000000000000..66c21265c34ccb --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_38_jeevesh8_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_ft_qqp_38_jeevesh8 BertForSequenceClassification from Jeevesh8 +author: John Snow Labs +name: bert_ft_qqp_38_jeevesh8 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_ft_qqp_38_jeevesh8` is a English model originally trained by Jeevesh8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_ft_qqp_38_jeevesh8_en_5.1.4_3.4_1698211283214.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_ft_qqp_38_jeevesh8_en_5.1.4_3.4_1698211283214.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_ft_qqp_38_jeevesh8","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_ft_qqp_38_jeevesh8","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_ft_qqp_38_jeevesh8| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Jeevesh8/bert_ft_qqp-38 \ No newline at end of file From 0207ba89aff1be562c4767a74b7dd0dc1cbcc837 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 17:12:49 +0700 Subject: [PATCH 158/301] Add model 2023-10-25-bert_tweets_semeval_unclean_en --- ...23-10-25-bert_tweets_semeval_unclean_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_tweets_semeval_unclean_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_tweets_semeval_unclean_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_tweets_semeval_unclean_en.md new file mode 100644 index 00000000000000..797d04bc672bb2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_tweets_semeval_unclean_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_tweets_semeval_unclean BertForSequenceClassification from DoyyingFace +author: John Snow Labs +name: bert_tweets_semeval_unclean +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_tweets_semeval_unclean` is a English model originally trained by DoyyingFace. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_tweets_semeval_unclean_en_5.1.4_3.4_1698201555184.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_tweets_semeval_unclean_en_5.1.4_3.4_1698201555184.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_tweets_semeval_unclean","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_tweets_semeval_unclean","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_tweets_semeval_unclean| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/DoyyingFace/bert-tweets-semeval-unclean \ No newline at end of file From dd82a802301826312693a782e8fbbd33de6ef607 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 17:13:50 +0700 Subject: [PATCH 159/301] Add model 2023-10-25-bert_ft_cola_80_en --- .../2023-10-25-bert_ft_cola_80_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_ft_cola_80_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_ft_cola_80_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_ft_cola_80_en.md new file mode 100644 index 00000000000000..a11758a4334828 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_ft_cola_80_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_ft_cola_80 BertForSequenceClassification from Jeevesh8 +author: John Snow Labs +name: bert_ft_cola_80 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_ft_cola_80` is a English model originally trained by Jeevesh8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_ft_cola_80_en_5.1.4_3.4_1698223321437.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_ft_cola_80_en_5.1.4_3.4_1698223321437.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_ft_cola_80","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_ft_cola_80","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_ft_cola_80| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Jeevesh8/bert_ft_cola-80 \ No newline at end of file From df6ceaa36c4541f5cf01f6c221e2ab0b2bbd12e3 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 17:14:50 +0700 Subject: [PATCH 160/301] Add model 2023-10-25-goog_bert_ft_cola_30_en --- .../2023-10-25-goog_bert_ft_cola_30_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_30_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_30_en.md b/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_30_en.md new file mode 100644 index 00000000000000..6ddedc444a2eea --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_30_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English goog_bert_ft_cola_30 BertForSequenceClassification from Jeevesh8 +author: John Snow Labs +name: goog_bert_ft_cola_30 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`goog_bert_ft_cola_30` is a English model originally trained by Jeevesh8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/goog_bert_ft_cola_30_en_5.1.4_3.4_1698219245865.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/goog_bert_ft_cola_30_en_5.1.4_3.4_1698219245865.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("goog_bert_ft_cola_30","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("goog_bert_ft_cola_30","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|goog_bert_ft_cola_30| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Jeevesh8/goog_bert_ft_cola-30 \ No newline at end of file From a5c86f5e2dd5c7fda18fffed83c29f5ac9760300 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 17:15:50 +0700 Subject: [PATCH 161/301] Add model 2023-10-25-bert_ft_qqp_75_jeevesh8_en --- .../2023-10-25-bert_ft_qqp_75_jeevesh8_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_75_jeevesh8_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_75_jeevesh8_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_75_jeevesh8_en.md new file mode 100644 index 00000000000000..43eb52676b0869 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_75_jeevesh8_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_ft_qqp_75_jeevesh8 BertForSequenceClassification from Jeevesh8 +author: John Snow Labs +name: bert_ft_qqp_75_jeevesh8 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_ft_qqp_75_jeevesh8` is a English model originally trained by Jeevesh8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_ft_qqp_75_jeevesh8_en_5.1.4_3.4_1698218407337.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_ft_qqp_75_jeevesh8_en_5.1.4_3.4_1698218407337.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_ft_qqp_75_jeevesh8","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_ft_qqp_75_jeevesh8","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_ft_qqp_75_jeevesh8| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Jeevesh8/bert_ft_qqp-75 \ No newline at end of file From f5a9d52ad1b2947de640d444afdddfe337f6cc79 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 17:16:51 +0700 Subject: [PATCH 162/301] Add model 2023-10-25-fine_tuned_indonli_basic_with_indobert_base_uncased_afaji_en --- ...sic_with_indobert_base_uncased_afaji_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-fine_tuned_indonli_basic_with_indobert_base_uncased_afaji_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-fine_tuned_indonli_basic_with_indobert_base_uncased_afaji_en.md b/docs/_posts/ahmedlone127/2023-10-25-fine_tuned_indonli_basic_with_indobert_base_uncased_afaji_en.md new file mode 100644 index 00000000000000..c8a49a6f970488 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-fine_tuned_indonli_basic_with_indobert_base_uncased_afaji_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English fine_tuned_indonli_basic_with_indobert_base_uncased_afaji BertForSequenceClassification from afaji +author: John Snow Labs +name: fine_tuned_indonli_basic_with_indobert_base_uncased_afaji +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`fine_tuned_indonli_basic_with_indobert_base_uncased_afaji` is a English model originally trained by afaji. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/fine_tuned_indonli_basic_with_indobert_base_uncased_afaji_en_5.1.4_3.4_1698219271386.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/fine_tuned_indonli_basic_with_indobert_base_uncased_afaji_en_5.1.4_3.4_1698219271386.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("fine_tuned_indonli_basic_with_indobert_base_uncased_afaji","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("fine_tuned_indonli_basic_with_indobert_base_uncased_afaji","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|fine_tuned_indonli_basic_with_indobert_base_uncased_afaji| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|413.9 MB| + +## References + +https://huggingface.co/afaji/fine-tuned-IndoNLI-Basic-with-indobert-base-uncased \ No newline at end of file From b4b91be4fe8a36cf98181ac0ca58e1676f0e19f0 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 17:17:51 +0700 Subject: [PATCH 163/301] Add model 2023-10-25-esg_bert_sector_classifier_en --- ...023-10-25-esg_bert_sector_classifier_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-esg_bert_sector_classifier_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-esg_bert_sector_classifier_en.md b/docs/_posts/ahmedlone127/2023-10-25-esg_bert_sector_classifier_en.md new file mode 100644 index 00000000000000..9d2530e3eb06d4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-esg_bert_sector_classifier_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English esg_bert_sector_classifier BertForSequenceClassification from ppsingh +author: John Snow Labs +name: esg_bert_sector_classifier +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`esg_bert_sector_classifier` is a English model originally trained by ppsingh. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/esg_bert_sector_classifier_en_5.1.4_3.4_1698229025440.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/esg_bert_sector_classifier_en_5.1.4_3.4_1698229025440.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("esg_bert_sector_classifier","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("esg_bert_sector_classifier","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|esg_bert_sector_classifier| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.8 MB| + +## References + +https://huggingface.co/ppsingh/esg-bert-sector-classifier \ No newline at end of file From de5c2d5238c397066c3b7e6a9b6ef7330d5a94f5 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 17:18:51 +0700 Subject: [PATCH 164/301] Add model 2023-10-25-bert_mini_finetuned_stsb_en --- .../2023-10-25-bert_mini_finetuned_stsb_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_mini_finetuned_stsb_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_mini_finetuned_stsb_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_mini_finetuned_stsb_en.md new file mode 100644 index 00000000000000..34b3b2d4bac057 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_mini_finetuned_stsb_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_mini_finetuned_stsb BertForSequenceClassification from M-FAC +author: John Snow Labs +name: bert_mini_finetuned_stsb +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_mini_finetuned_stsb` is a English model originally trained by M-FAC. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_mini_finetuned_stsb_en_5.1.4_3.4_1698222538855.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_mini_finetuned_stsb_en_5.1.4_3.4_1698222538855.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_mini_finetuned_stsb","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_mini_finetuned_stsb","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_mini_finetuned_stsb| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|42.1 MB| + +## References + +https://huggingface.co/M-FAC/bert-mini-finetuned-stsb \ No newline at end of file From 15db5b1d4806c43a4810e3308d260b5b6238a702 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 17:19:51 +0700 Subject: [PATCH 165/301] Add model 2023-10-25-bert_large_uncased_sst_2_16_13_smoothed_en --- ...t_large_uncased_sst_2_16_13_smoothed_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_large_uncased_sst_2_16_13_smoothed_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_large_uncased_sst_2_16_13_smoothed_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_large_uncased_sst_2_16_13_smoothed_en.md new file mode 100644 index 00000000000000..3f4e8dd0f028f6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_large_uncased_sst_2_16_13_smoothed_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_large_uncased_sst_2_16_13_smoothed BertForSequenceClassification from simonycl +author: John Snow Labs +name: bert_large_uncased_sst_2_16_13_smoothed +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_uncased_sst_2_16_13_smoothed` is a English model originally trained by simonycl. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_uncased_sst_2_16_13_smoothed_en_5.1.4_3.4_1698220491868.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_uncased_sst_2_16_13_smoothed_en_5.1.4_3.4_1698220491868.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_large_uncased_sst_2_16_13_smoothed","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_large_uncased_sst_2_16_13_smoothed","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_uncased_sst_2_16_13_smoothed| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/simonycl/bert-large-uncased-sst-2-16-13-smoothed \ No newline at end of file From e71cf8afc530dc962c2d071b449b34bed96175bb Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 17:20:52 +0700 Subject: [PATCH 166/301] Add model 2023-10-25-goog_bert_ft_cola_24_en --- .../2023-10-25-goog_bert_ft_cola_24_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_24_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_24_en.md b/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_24_en.md new file mode 100644 index 00000000000000..80f0e899b951eb --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_24_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English goog_bert_ft_cola_24 BertForSequenceClassification from Jeevesh8 +author: John Snow Labs +name: goog_bert_ft_cola_24 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`goog_bert_ft_cola_24` is a English model originally trained by Jeevesh8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/goog_bert_ft_cola_24_en_5.1.4_3.4_1698222516029.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/goog_bert_ft_cola_24_en_5.1.4_3.4_1698222516029.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("goog_bert_ft_cola_24","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("goog_bert_ft_cola_24","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|goog_bert_ft_cola_24| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Jeevesh8/goog_bert_ft_cola-24 \ No newline at end of file From 1c1b189d74d24f55007ef455e8efaaf24319642c Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 17:21:52 +0700 Subject: [PATCH 167/301] Add model 2023-10-25-bert_ft_cola_93_en --- .../2023-10-25-bert_ft_cola_93_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_ft_cola_93_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_ft_cola_93_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_ft_cola_93_en.md new file mode 100644 index 00000000000000..6aa06d646ead45 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_ft_cola_93_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_ft_cola_93 BertForSequenceClassification from Jeevesh8 +author: John Snow Labs +name: bert_ft_cola_93 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_ft_cola_93` is a English model originally trained by Jeevesh8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_ft_cola_93_en_5.1.4_3.4_1698229260438.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_ft_cola_93_en_5.1.4_3.4_1698229260438.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_ft_cola_93","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_ft_cola_93","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_ft_cola_93| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Jeevesh8/bert_ft_cola-93 \ No newline at end of file From 2281540088035097657446d0e24b18169818d07e Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 17:22:52 +0700 Subject: [PATCH 168/301] Add model 2023-10-25-bert_ft_qqp_90_jeevesh8_en --- .../2023-10-25-bert_ft_qqp_90_jeevesh8_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_90_jeevesh8_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_90_jeevesh8_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_90_jeevesh8_en.md new file mode 100644 index 00000000000000..b87c08c17e7577 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_90_jeevesh8_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_ft_qqp_90_jeevesh8 BertForSequenceClassification from Jeevesh8 +author: John Snow Labs +name: bert_ft_qqp_90_jeevesh8 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_ft_qqp_90_jeevesh8` is a English model originally trained by Jeevesh8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_ft_qqp_90_jeevesh8_en_5.1.4_3.4_1698221354382.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_ft_qqp_90_jeevesh8_en_5.1.4_3.4_1698221354382.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_ft_qqp_90_jeevesh8","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_ft_qqp_90_jeevesh8","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_ft_qqp_90_jeevesh8| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Jeevesh8/bert_ft_qqp-90 \ No newline at end of file From 6bbd4d4bff2781143ef95159ea383efc771ec349 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 17:23:53 +0700 Subject: [PATCH 169/301] Add model 2023-10-25-bert_mini_finetuned_qqp_en --- .../2023-10-25-bert_mini_finetuned_qqp_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_mini_finetuned_qqp_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_mini_finetuned_qqp_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_mini_finetuned_qqp_en.md new file mode 100644 index 00000000000000..7dc7272d8455d0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_mini_finetuned_qqp_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_mini_finetuned_qqp BertForSequenceClassification from M-FAC +author: John Snow Labs +name: bert_mini_finetuned_qqp +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_mini_finetuned_qqp` is a English model originally trained by M-FAC. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_mini_finetuned_qqp_en_5.1.4_3.4_1698222353761.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_mini_finetuned_qqp_en_5.1.4_3.4_1698222353761.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_mini_finetuned_qqp","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_mini_finetuned_qqp","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_mini_finetuned_qqp| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|42.1 MB| + +## References + +https://huggingface.co/M-FAC/bert-mini-finetuned-qqp \ No newline at end of file From 7e1fbb29cffac053e436ae7220b6b9504cde7a50 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 17:24:54 +0700 Subject: [PATCH 170/301] Add model 2023-10-25-goog_bert_ft_cola_33_en --- .../2023-10-25-goog_bert_ft_cola_33_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_33_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_33_en.md b/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_33_en.md new file mode 100644 index 00000000000000..ee7679c3eca54d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_33_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English goog_bert_ft_cola_33 BertForSequenceClassification from Jeevesh8 +author: John Snow Labs +name: goog_bert_ft_cola_33 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`goog_bert_ft_cola_33` is a English model originally trained by Jeevesh8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/goog_bert_ft_cola_33_en_5.1.4_3.4_1698219845008.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/goog_bert_ft_cola_33_en_5.1.4_3.4_1698219845008.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("goog_bert_ft_cola_33","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("goog_bert_ft_cola_33","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|goog_bert_ft_cola_33| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Jeevesh8/goog_bert_ft_cola-33 \ No newline at end of file From 9b9741d0d5dd5e410a30a16479ec2165e5f49f89 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 17:25:53 +0700 Subject: [PATCH 171/301] Add model 2023-10-25-fine_tuned_indonli_augmented_with_indobert_large_p2_afaji_en --- ...gmented_with_indobert_large_p2_afaji_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-fine_tuned_indonli_augmented_with_indobert_large_p2_afaji_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-fine_tuned_indonli_augmented_with_indobert_large_p2_afaji_en.md b/docs/_posts/ahmedlone127/2023-10-25-fine_tuned_indonli_augmented_with_indobert_large_p2_afaji_en.md new file mode 100644 index 00000000000000..621d35e533a7d6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-fine_tuned_indonli_augmented_with_indobert_large_p2_afaji_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English fine_tuned_indonli_augmented_with_indobert_large_p2_afaji BertForSequenceClassification from afaji +author: John Snow Labs +name: fine_tuned_indonli_augmented_with_indobert_large_p2_afaji +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`fine_tuned_indonli_augmented_with_indobert_large_p2_afaji` is a English model originally trained by afaji. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/fine_tuned_indonli_augmented_with_indobert_large_p2_afaji_en_5.1.4_3.4_1698220859599.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/fine_tuned_indonli_augmented_with_indobert_large_p2_afaji_en_5.1.4_3.4_1698220859599.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("fine_tuned_indonli_augmented_with_indobert_large_p2_afaji","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("fine_tuned_indonli_augmented_with_indobert_large_p2_afaji","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|fine_tuned_indonli_augmented_with_indobert_large_p2_afaji| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/afaji/fine-tuned-IndoNLI-Augmented-with-indobert-large-p2 \ No newline at end of file From ed2de9710b3128a0ee1d4f26803615c7b35d5956 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 17:26:54 +0700 Subject: [PATCH 172/301] Add model 2023-10-25-bert_ft_cola_85_en --- .../2023-10-25-bert_ft_cola_85_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_ft_cola_85_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_ft_cola_85_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_ft_cola_85_en.md new file mode 100644 index 00000000000000..2c535953295c5e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_ft_cola_85_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_ft_cola_85 BertForSequenceClassification from Jeevesh8 +author: John Snow Labs +name: bert_ft_cola_85 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_ft_cola_85` is a English model originally trained by Jeevesh8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_ft_cola_85_en_5.1.4_3.4_1698224389127.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_ft_cola_85_en_5.1.4_3.4_1698224389127.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_ft_cola_85","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_ft_cola_85","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_ft_cola_85| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Jeevesh8/bert_ft_cola-85 \ No newline at end of file From 5da3608e6355c52dc1938228eb479eb617b744e7 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 17:27:54 +0700 Subject: [PATCH 173/301] Add model 2023-10-25-bert_large_uncased_hoax_classifier_v1_en --- ...ert_large_uncased_hoax_classifier_v1_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_large_uncased_hoax_classifier_v1_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_large_uncased_hoax_classifier_v1_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_large_uncased_hoax_classifier_v1_en.md new file mode 100644 index 00000000000000..d70f7e5053f647 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_large_uncased_hoax_classifier_v1_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_large_uncased_hoax_classifier_v1 BertForSequenceClassification from research-dump +author: John Snow Labs +name: bert_large_uncased_hoax_classifier_v1 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_uncased_hoax_classifier_v1` is a English model originally trained by research-dump. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_uncased_hoax_classifier_v1_en_5.1.4_3.4_1698229551553.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_uncased_hoax_classifier_v1_en_5.1.4_3.4_1698229551553.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_large_uncased_hoax_classifier_v1","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_large_uncased_hoax_classifier_v1","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_uncased_hoax_classifier_v1| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/research-dump/bert-large-uncased_hoax_classifier_v1 \ No newline at end of file From 89f2f22fb4d4b9bae194b6be640032c6d281a6a7 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 17:28:54 +0700 Subject: [PATCH 174/301] Add model 2023-10-25-roberta_fake_real_en --- .../2023-10-25-roberta_fake_real_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-roberta_fake_real_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-roberta_fake_real_en.md b/docs/_posts/ahmedlone127/2023-10-25-roberta_fake_real_en.md new file mode 100644 index 00000000000000..ff3001149544cc --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-roberta_fake_real_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English roberta_fake_real BertForSequenceClassification from PravallikaMyneni +author: John Snow Labs +name: roberta_fake_real +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_fake_real` is a English model originally trained by PravallikaMyneni. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_fake_real_en_5.1.4_3.4_1698221775747.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_fake_real_en_5.1.4_3.4_1698221775747.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("roberta_fake_real","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("roberta_fake_real","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_fake_real| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|405.9 MB| + +## References + +https://huggingface.co/PravallikaMyneni/roberta_fake_real \ No newline at end of file From 03f7898fba5eccdf5141264aa742e903acf40641 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 17:29:55 +0700 Subject: [PATCH 175/301] Add model 2023-10-25-bert_tiny_finetuned_stsb_en --- .../2023-10-25-bert_tiny_finetuned_stsb_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_tiny_finetuned_stsb_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_tiny_finetuned_stsb_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_tiny_finetuned_stsb_en.md new file mode 100644 index 00000000000000..b625afd2dd6355 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_tiny_finetuned_stsb_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_tiny_finetuned_stsb BertForSequenceClassification from M-FAC +author: John Snow Labs +name: bert_tiny_finetuned_stsb +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_tiny_finetuned_stsb` is a English model originally trained by M-FAC. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_tiny_finetuned_stsb_en_5.1.4_3.4_1698223101321.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_tiny_finetuned_stsb_en_5.1.4_3.4_1698223101321.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_tiny_finetuned_stsb","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_tiny_finetuned_stsb","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_tiny_finetuned_stsb| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|16.7 MB| + +## References + +https://huggingface.co/M-FAC/bert-tiny-finetuned-stsb \ No newline at end of file From f9c2a572b9b0975f1924152dd36ac43413eee42c Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 17:30:55 +0700 Subject: [PATCH 176/301] Add model 2023-10-25-bert_ft_qqp_86_jeevesh8_en --- .../2023-10-25-bert_ft_qqp_86_jeevesh8_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_86_jeevesh8_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_86_jeevesh8_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_86_jeevesh8_en.md new file mode 100644 index 00000000000000..6c1a72f86b478d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_86_jeevesh8_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_ft_qqp_86_jeevesh8 BertForSequenceClassification from Jeevesh8 +author: John Snow Labs +name: bert_ft_qqp_86_jeevesh8 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_ft_qqp_86_jeevesh8` is a English model originally trained by Jeevesh8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_ft_qqp_86_jeevesh8_en_5.1.4_3.4_1698220562838.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_ft_qqp_86_jeevesh8_en_5.1.4_3.4_1698220562838.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_ft_qqp_86_jeevesh8","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_ft_qqp_86_jeevesh8","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_ft_qqp_86_jeevesh8| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Jeevesh8/bert_ft_qqp-86 \ No newline at end of file From 04790af47d0eaa39cfa179812303119f3a3e02fe Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 17:31:55 +0700 Subject: [PATCH 177/301] Add model 2023-10-25-bert_sequence_classifier_persian_farsi_base_uncased_sentiment_deepsentipers_multi_fa --- ...ncased_sentiment_deepsentipers_multi_fa.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_sequence_classifier_persian_farsi_base_uncased_sentiment_deepsentipers_multi_fa.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_sequence_classifier_persian_farsi_base_uncased_sentiment_deepsentipers_multi_fa.md b/docs/_posts/ahmedlone127/2023-10-25-bert_sequence_classifier_persian_farsi_base_uncased_sentiment_deepsentipers_multi_fa.md new file mode 100644 index 00000000000000..9feda5ef5fc54a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_sequence_classifier_persian_farsi_base_uncased_sentiment_deepsentipers_multi_fa.md @@ -0,0 +1,97 @@ +--- +layout: model +title: Persian bert_sequence_classifier_persian_farsi_base_uncased_sentiment_deepsentipers_multi BertForSequenceClassification from HooshvareLab +author: John Snow Labs +name: bert_sequence_classifier_persian_farsi_base_uncased_sentiment_deepsentipers_multi +date: 2023-10-25 +tags: [bert, fa, open_source, sequence_classification, onnx] +task: Text Classification +language: fa +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_sequence_classifier_persian_farsi_base_uncased_sentiment_deepsentipers_multi` is a Persian model originally trained by HooshvareLab. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_sequence_classifier_persian_farsi_base_uncased_sentiment_deepsentipers_multi_fa_5.1.4_3.4_1698210004394.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_sequence_classifier_persian_farsi_base_uncased_sentiment_deepsentipers_multi_fa_5.1.4_3.4_1698210004394.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_sequence_classifier_persian_farsi_base_uncased_sentiment_deepsentipers_multi","fa")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_sequence_classifier_persian_farsi_base_uncased_sentiment_deepsentipers_multi","fa") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_sequence_classifier_persian_farsi_base_uncased_sentiment_deepsentipers_multi| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|fa| +|Size:|608.7 MB| + +## References + +https://huggingface.co/HooshvareLab/bert-fa-base-uncased-sentiment-deepsentipers-multi \ No newline at end of file From 373e42668ff32c7d428e5835624d15227afae397 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 17:32:55 +0700 Subject: [PATCH 178/301] Add model 2023-10-25-bert_base_uncased_reviews_4_en --- ...23-10-25-bert_base_uncased_reviews_4_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_base_uncased_reviews_4_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_base_uncased_reviews_4_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_base_uncased_reviews_4_en.md new file mode 100644 index 00000000000000..489547802d9d39 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_base_uncased_reviews_4_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_base_uncased_reviews_4 BertForSequenceClassification from insaf +author: John Snow Labs +name: bert_base_uncased_reviews_4 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_reviews_4` is a English model originally trained by insaf. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_reviews_4_en_5.1.4_3.4_1698229934185.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_reviews_4_en_5.1.4_3.4_1698229934185.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_base_uncased_reviews_4","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_base_uncased_reviews_4","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_reviews_4| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/insaf/bert-base-uncased-reviews-4 \ No newline at end of file From 160cb67bd988d5e526f800827612b070f53e4e73 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 17:33:55 +0700 Subject: [PATCH 179/301] Add model 2023-10-25-goog_bert_ft_cola_55_en --- .../2023-10-25-goog_bert_ft_cola_55_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_55_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_55_en.md b/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_55_en.md new file mode 100644 index 00000000000000..d273e6a62e91d0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_55_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English goog_bert_ft_cola_55 BertForSequenceClassification from Jeevesh8 +author: John Snow Labs +name: goog_bert_ft_cola_55 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`goog_bert_ft_cola_55` is a English model originally trained by Jeevesh8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/goog_bert_ft_cola_55_en_5.1.4_3.4_1698229934190.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/goog_bert_ft_cola_55_en_5.1.4_3.4_1698229934190.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("goog_bert_ft_cola_55","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("goog_bert_ft_cola_55","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|goog_bert_ft_cola_55| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Jeevesh8/goog_bert_ft_cola-55 \ No newline at end of file From 792d4b52d94a49155745404b42714ccfcc0db9d9 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 17:34:56 +0700 Subject: [PATCH 180/301] Add model 2023-10-25-fine_tuned_indonli_basic_with_indobert_large_p2_muhammadravi251001_en --- ...indobert_large_p2_muhammadravi251001_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-fine_tuned_indonli_basic_with_indobert_large_p2_muhammadravi251001_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-fine_tuned_indonli_basic_with_indobert_large_p2_muhammadravi251001_en.md b/docs/_posts/ahmedlone127/2023-10-25-fine_tuned_indonli_basic_with_indobert_large_p2_muhammadravi251001_en.md new file mode 100644 index 00000000000000..4fc489ec386c5c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-fine_tuned_indonli_basic_with_indobert_large_p2_muhammadravi251001_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English fine_tuned_indonli_basic_with_indobert_large_p2_muhammadravi251001 BertForSequenceClassification from muhammadravi251001 +author: John Snow Labs +name: fine_tuned_indonli_basic_with_indobert_large_p2_muhammadravi251001 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`fine_tuned_indonli_basic_with_indobert_large_p2_muhammadravi251001` is a English model originally trained by muhammadravi251001. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/fine_tuned_indonli_basic_with_indobert_large_p2_muhammadravi251001_en_5.1.4_3.4_1698224046216.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/fine_tuned_indonli_basic_with_indobert_large_p2_muhammadravi251001_en_5.1.4_3.4_1698224046216.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("fine_tuned_indonli_basic_with_indobert_large_p2_muhammadravi251001","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("fine_tuned_indonli_basic_with_indobert_large_p2_muhammadravi251001","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|fine_tuned_indonli_basic_with_indobert_large_p2_muhammadravi251001| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/muhammadravi251001/fine-tuned-IndoNLI-Basic-with-indobert-large-p2 \ No newline at end of file From 5c5a92c92da8572d06f860ec89e59ce6af2931d4 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 17:35:56 +0700 Subject: [PATCH 181/301] Add model 2023-10-25-bert_ft_cola_94_en --- .../2023-10-25-bert_ft_cola_94_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_ft_cola_94_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_ft_cola_94_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_ft_cola_94_en.md new file mode 100644 index 00000000000000..b42bb56a7fe310 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_ft_cola_94_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_ft_cola_94 BertForSequenceClassification from Jeevesh8 +author: John Snow Labs +name: bert_ft_cola_94 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_ft_cola_94` is a English model originally trained by Jeevesh8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_ft_cola_94_en_5.1.4_3.4_1698230141395.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_ft_cola_94_en_5.1.4_3.4_1698230141395.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_ft_cola_94","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_ft_cola_94","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_ft_cola_94| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Jeevesh8/bert_ft_cola-94 \ No newline at end of file From b1a28c8e14e6191d7dca1715d163ec84a50761e9 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 17:36:56 +0700 Subject: [PATCH 182/301] Add model 2023-10-25-bert_large_uncased_sst_2_32_13_30_en --- ...25-bert_large_uncased_sst_2_32_13_30_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_large_uncased_sst_2_32_13_30_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_large_uncased_sst_2_32_13_30_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_large_uncased_sst_2_32_13_30_en.md new file mode 100644 index 00000000000000..10834a73e981af --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_large_uncased_sst_2_32_13_30_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_large_uncased_sst_2_32_13_30 BertForSequenceClassification from simonycl +author: John Snow Labs +name: bert_large_uncased_sst_2_32_13_30 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_uncased_sst_2_32_13_30` is a English model originally trained by simonycl. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_uncased_sst_2_32_13_30_en_5.1.4_3.4_1698225008323.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_uncased_sst_2_32_13_30_en_5.1.4_3.4_1698225008323.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_large_uncased_sst_2_32_13_30","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_large_uncased_sst_2_32_13_30","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_uncased_sst_2_32_13_30| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/simonycl/bert-large-uncased-sst-2-32-13-30 \ No newline at end of file From f7f25371afec485245cd6d7b44f27ec6cacdfba7 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 17:37:56 +0700 Subject: [PATCH 183/301] Add model 2023-10-25-bert_ft_qqp_42_jeevesh8_en --- .../2023-10-25-bert_ft_qqp_42_jeevesh8_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_42_jeevesh8_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_42_jeevesh8_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_42_jeevesh8_en.md new file mode 100644 index 00000000000000..6e8cd8ba73b5c0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_42_jeevesh8_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_ft_qqp_42_jeevesh8 BertForSequenceClassification from Jeevesh8 +author: John Snow Labs +name: bert_ft_qqp_42_jeevesh8 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_ft_qqp_42_jeevesh8` is a English model originally trained by Jeevesh8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_ft_qqp_42_jeevesh8_en_5.1.4_3.4_1698212046210.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_ft_qqp_42_jeevesh8_en_5.1.4_3.4_1698212046210.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_ft_qqp_42_jeevesh8","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_ft_qqp_42_jeevesh8","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_ft_qqp_42_jeevesh8| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Jeevesh8/bert_ft_qqp-42 \ No newline at end of file From 10e048a7e76b739bd767c50f898778d3cb31078e Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 17:38:56 +0700 Subject: [PATCH 184/301] Add model 2023-10-25-bert_italian_irony_en --- .../2023-10-25-bert_italian_irony_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_italian_irony_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_italian_irony_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_italian_irony_en.md new file mode 100644 index 00000000000000..8ecdc13462ecf9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_italian_irony_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_italian_irony BertForSequenceClassification from pysentimiento +author: John Snow Labs +name: bert_italian_irony +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_italian_irony` is a English model originally trained by pysentimiento. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_italian_irony_en_5.1.4_3.4_1698222335196.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_italian_irony_en_5.1.4_3.4_1698222335196.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_italian_irony","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_italian_irony","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_italian_irony| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|411.9 MB| + +## References + +https://huggingface.co/pysentimiento/bert-it-irony \ No newline at end of file From aabf76e0e5068405d3e3b4bbc3c83a74cbff3976 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 17:39:57 +0700 Subject: [PATCH 185/301] Add model 2023-10-25-bert_large_uncased_cola_b_en --- ...2023-10-25-bert_large_uncased_cola_b_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_large_uncased_cola_b_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_large_uncased_cola_b_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_large_uncased_cola_b_en.md new file mode 100644 index 00000000000000..4a7a2833858a43 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_large_uncased_cola_b_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_large_uncased_cola_b BertForSequenceClassification from EhsanAghazadeh +author: John Snow Labs +name: bert_large_uncased_cola_b +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_uncased_cola_b` is a English model originally trained by EhsanAghazadeh. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_uncased_cola_b_en_5.1.4_3.4_1698204080875.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_uncased_cola_b_en_5.1.4_3.4_1698204080875.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_large_uncased_cola_b","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_large_uncased_cola_b","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_uncased_cola_b| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/EhsanAghazadeh/bert-large-uncased-CoLA_B \ No newline at end of file From c1e9386f7f186d1d903f53813496ba619289d696 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 17:40:57 +0700 Subject: [PATCH 186/301] Add model 2023-10-25-bert_ft_qqp_96_jeevesh8_en --- .../2023-10-25-bert_ft_qqp_96_jeevesh8_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_96_jeevesh8_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_96_jeevesh8_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_96_jeevesh8_en.md new file mode 100644 index 00000000000000..6a4460720fd2e1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_96_jeevesh8_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_ft_qqp_96_jeevesh8 BertForSequenceClassification from Jeevesh8 +author: John Snow Labs +name: bert_ft_qqp_96_jeevesh8 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_ft_qqp_96_jeevesh8` is a English model originally trained by Jeevesh8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_ft_qqp_96_jeevesh8_en_5.1.4_3.4_1698222552917.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_ft_qqp_96_jeevesh8_en_5.1.4_3.4_1698222552917.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_ft_qqp_96_jeevesh8","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_ft_qqp_96_jeevesh8","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_ft_qqp_96_jeevesh8| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Jeevesh8/bert_ft_qqp-96 \ No newline at end of file From 402b7bba98cdc9ba3214691e4b77201e65c284e2 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 17:41:57 +0700 Subject: [PATCH 187/301] Add model 2023-10-25-bert_base_mdoc_bm25_en --- .../2023-10-25-bert_base_mdoc_bm25_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_base_mdoc_bm25_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_base_mdoc_bm25_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_base_mdoc_bm25_en.md new file mode 100644 index 00000000000000..8d013fc00ce1ce --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_base_mdoc_bm25_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_base_mdoc_bm25 BertForSequenceClassification from Luyu +author: John Snow Labs +name: bert_base_mdoc_bm25 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_mdoc_bm25` is a English model originally trained by Luyu. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_mdoc_bm25_en_5.1.4_3.4_1698221725395.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_mdoc_bm25_en_5.1.4_3.4_1698221725395.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_base_mdoc_bm25","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_base_mdoc_bm25","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_mdoc_bm25| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Luyu/bert-base-mdoc-bm25 \ No newline at end of file From 18e48047278c61ac12170c41a7ab57ab7c4afa19 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 17:42:57 +0700 Subject: [PATCH 188/301] Add model 2023-10-25-bert_ft_cola_86_en --- .../2023-10-25-bert_ft_cola_86_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_ft_cola_86_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_ft_cola_86_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_ft_cola_86_en.md new file mode 100644 index 00000000000000..04c994e1e652fc --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_ft_cola_86_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_ft_cola_86 BertForSequenceClassification from Jeevesh8 +author: John Snow Labs +name: bert_ft_cola_86 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_ft_cola_86` is a English model originally trained by Jeevesh8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_ft_cola_86_en_5.1.4_3.4_1698224587586.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_ft_cola_86_en_5.1.4_3.4_1698224587586.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_ft_cola_86","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_ft_cola_86","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_ft_cola_86| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Jeevesh8/bert_ft_cola-86 \ No newline at end of file From 89e1aeb5f4234b61035fe54d8fe7238a923f1f14 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 17:43:57 +0700 Subject: [PATCH 189/301] Add model 2023-10-25-bert_ft_qqp_84_jeevesh8_en --- .../2023-10-25-bert_ft_qqp_84_jeevesh8_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_84_jeevesh8_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_84_jeevesh8_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_84_jeevesh8_en.md new file mode 100644 index 00000000000000..db8311cb113793 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_84_jeevesh8_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_ft_qqp_84_jeevesh8 BertForSequenceClassification from Jeevesh8 +author: John Snow Labs +name: bert_ft_qqp_84_jeevesh8 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_ft_qqp_84_jeevesh8` is a English model originally trained by Jeevesh8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_ft_qqp_84_jeevesh8_en_5.1.4_3.4_1698220142773.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_ft_qqp_84_jeevesh8_en_5.1.4_3.4_1698220142773.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_ft_qqp_84_jeevesh8","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_ft_qqp_84_jeevesh8","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_ft_qqp_84_jeevesh8| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Jeevesh8/bert_ft_qqp-84 \ No newline at end of file From de75f3fd0b930f39d4c9d13a7ff0ad76f653c9d5 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 17:44:58 +0700 Subject: [PATCH 190/301] Add model 2023-10-25-goog_bert_ft_cola_9_en --- .../2023-10-25-goog_bert_ft_cola_9_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_9_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_9_en.md b/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_9_en.md new file mode 100644 index 00000000000000..f63cd5d3f30d5c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_9_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English goog_bert_ft_cola_9 BertForSequenceClassification from Jeevesh8 +author: John Snow Labs +name: goog_bert_ft_cola_9 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`goog_bert_ft_cola_9` is a English model originally trained by Jeevesh8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/goog_bert_ft_cola_9_en_5.1.4_3.4_1698215840044.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/goog_bert_ft_cola_9_en_5.1.4_3.4_1698215840044.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("goog_bert_ft_cola_9","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("goog_bert_ft_cola_9","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|goog_bert_ft_cola_9| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Jeevesh8/goog_bert_ft_cola-9 \ No newline at end of file From 5874c3f8a7ba8d9197be7a1c51eff78f5e4537be Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 17:45:58 +0700 Subject: [PATCH 191/301] Add model 2023-10-25-bert_base_multilingual_cased_mrpc_glue_xx --- ...rt_base_multilingual_cased_mrpc_glue_xx.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_base_multilingual_cased_mrpc_glue_xx.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_base_multilingual_cased_mrpc_glue_xx.md b/docs/_posts/ahmedlone127/2023-10-25-bert_base_multilingual_cased_mrpc_glue_xx.md new file mode 100644 index 00000000000000..1a2fecef6fe686 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_base_multilingual_cased_mrpc_glue_xx.md @@ -0,0 +1,97 @@ +--- +layout: model +title: Multilingual bert_base_multilingual_cased_mrpc_glue BertForSequenceClassification from rriverar75 +author: John Snow Labs +name: bert_base_multilingual_cased_mrpc_glue +date: 2023-10-25 +tags: [bert, xx, open_source, sequence_classification, onnx] +task: Text Classification +language: xx +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_multilingual_cased_mrpc_glue` is a Multilingual model originally trained by rriverar75. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_multilingual_cased_mrpc_glue_xx_5.1.4_3.4_1698230620198.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_multilingual_cased_mrpc_glue_xx_5.1.4_3.4_1698230620198.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_base_multilingual_cased_mrpc_glue","xx")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_base_multilingual_cased_mrpc_glue","xx") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_multilingual_cased_mrpc_glue| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|xx| +|Size:|667.3 MB| + +## References + +https://huggingface.co/rriverar75/bert-base-multilingual-cased-mrpc-glue \ No newline at end of file From 54fdae135d2c5dd0beb08de4b5991ce5cc7e4e1d Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 17:46:58 +0700 Subject: [PATCH 192/301] Add model 2023-10-25-goog_bert_ft_cola_54_en --- .../2023-10-25-goog_bert_ft_cola_54_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_54_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_54_en.md b/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_54_en.md new file mode 100644 index 00000000000000..2390033ee09458 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_54_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English goog_bert_ft_cola_54 BertForSequenceClassification from Jeevesh8 +author: John Snow Labs +name: goog_bert_ft_cola_54 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`goog_bert_ft_cola_54` is a English model originally trained by Jeevesh8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/goog_bert_ft_cola_54_en_5.1.4_3.4_1698230803166.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/goog_bert_ft_cola_54_en_5.1.4_3.4_1698230803166.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("goog_bert_ft_cola_54","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("goog_bert_ft_cola_54","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|goog_bert_ft_cola_54| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Jeevesh8/goog_bert_ft_cola-54 \ No newline at end of file From 7a265fa834696c831050be2dacea2acd71ee9dda Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 17:47:58 +0700 Subject: [PATCH 193/301] Add model 2023-10-25-bert_finetuned_winogrande_en --- ...2023-10-25-bert_finetuned_winogrande_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_finetuned_winogrande_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_finetuned_winogrande_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_finetuned_winogrande_en.md new file mode 100644 index 00000000000000..da788c8034df82 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_finetuned_winogrande_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_finetuned_winogrande BertForSequenceClassification from Kalslice +author: John Snow Labs +name: bert_finetuned_winogrande +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_finetuned_winogrande` is a English model originally trained by Kalslice. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_finetuned_winogrande_en_5.1.4_3.4_1698230827620.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_finetuned_winogrande_en_5.1.4_3.4_1698230827620.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_finetuned_winogrande","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_finetuned_winogrande","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_finetuned_winogrande| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Kalslice/bert-finetuned-winogrande \ No newline at end of file From 2c212dec3e4dff9bd8d35ad57f557ed4df38d3ae Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 17:48:59 +0700 Subject: [PATCH 194/301] Add model 2023-10-25-bert_fakenews_en --- .../2023-10-25-bert_fakenews_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_fakenews_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_fakenews_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_fakenews_en.md new file mode 100644 index 00000000000000..12621cd2dc0b79 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_fakenews_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_fakenews BertForSequenceClassification from JKKANG +author: John Snow Labs +name: bert_fakenews +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_fakenews` is a English model originally trained by JKKANG. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_fakenews_en_5.1.4_3.4_1698221382925.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_fakenews_en_5.1.4_3.4_1698221382925.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_fakenews","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_fakenews","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_fakenews| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|414.6 MB| + +## References + +https://huggingface.co/JKKANG/bert-fakenews \ No newline at end of file From fd3b24026a53c6a65faf97d034b744be549ceb08 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 17:49:59 +0700 Subject: [PATCH 195/301] Add model 2023-10-25-bert_ft_cola_95_en --- .../2023-10-25-bert_ft_cola_95_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_ft_cola_95_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_ft_cola_95_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_ft_cola_95_en.md new file mode 100644 index 00000000000000..429efe7c6a856a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_ft_cola_95_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_ft_cola_95 BertForSequenceClassification from Jeevesh8 +author: John Snow Labs +name: bert_ft_cola_95 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_ft_cola_95` is a English model originally trained by Jeevesh8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_ft_cola_95_en_5.1.4_3.4_1698230908873.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_ft_cola_95_en_5.1.4_3.4_1698230908873.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_ft_cola_95","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_ft_cola_95","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_ft_cola_95| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Jeevesh8/bert_ft_cola-95 \ No newline at end of file From f7d5cea68db5f1748387e4c1532e1f31ad3ebcb0 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 17:50:59 +0700 Subject: [PATCH 196/301] Add model 2023-10-25-goog_bert_ft_cola_57_en --- .../2023-10-25-goog_bert_ft_cola_57_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_57_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_57_en.md b/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_57_en.md new file mode 100644 index 00000000000000..6333ebd3b81807 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_57_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English goog_bert_ft_cola_57 BertForSequenceClassification from Jeevesh8 +author: John Snow Labs +name: goog_bert_ft_cola_57 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`goog_bert_ft_cola_57` is a English model originally trained by Jeevesh8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/goog_bert_ft_cola_57_en_5.1.4_3.4_1698228126156.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/goog_bert_ft_cola_57_en_5.1.4_3.4_1698228126156.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("goog_bert_ft_cola_57","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("goog_bert_ft_cola_57","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|goog_bert_ft_cola_57| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Jeevesh8/goog_bert_ft_cola-57 \ No newline at end of file From e49f4ab491388f44d79d7e45e1b387abd11e9410 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 17:52:00 +0700 Subject: [PATCH 197/301] Add model 2023-10-25-fine_tuned_indonli_augmented_with_indobert_base_uncased_muhammadravi251001_en --- ...bert_base_uncased_muhammadravi251001_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-fine_tuned_indonli_augmented_with_indobert_base_uncased_muhammadravi251001_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-fine_tuned_indonli_augmented_with_indobert_base_uncased_muhammadravi251001_en.md b/docs/_posts/ahmedlone127/2023-10-25-fine_tuned_indonli_augmented_with_indobert_base_uncased_muhammadravi251001_en.md new file mode 100644 index 00000000000000..99fe6e43b50a22 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-fine_tuned_indonli_augmented_with_indobert_base_uncased_muhammadravi251001_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English fine_tuned_indonli_augmented_with_indobert_base_uncased_muhammadravi251001 BertForSequenceClassification from muhammadravi251001 +author: John Snow Labs +name: fine_tuned_indonli_augmented_with_indobert_base_uncased_muhammadravi251001 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`fine_tuned_indonli_augmented_with_indobert_base_uncased_muhammadravi251001` is a English model originally trained by muhammadravi251001. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/fine_tuned_indonli_augmented_with_indobert_base_uncased_muhammadravi251001_en_5.1.4_3.4_1698225941984.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/fine_tuned_indonli_augmented_with_indobert_base_uncased_muhammadravi251001_en_5.1.4_3.4_1698225941984.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("fine_tuned_indonli_augmented_with_indobert_base_uncased_muhammadravi251001","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("fine_tuned_indonli_augmented_with_indobert_base_uncased_muhammadravi251001","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|fine_tuned_indonli_augmented_with_indobert_base_uncased_muhammadravi251001| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|413.9 MB| + +## References + +https://huggingface.co/muhammadravi251001/fine-tuned-IndoNLI-Augmented-with-indobert-base-uncased \ No newline at end of file From 5a57a323ebc47c2403142787d53f65ec352b3802 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 17:53:00 +0700 Subject: [PATCH 198/301] Add model 2023-10-25-bert_base_mdoc_hdct_en --- .../2023-10-25-bert_base_mdoc_hdct_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_base_mdoc_hdct_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_base_mdoc_hdct_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_base_mdoc_hdct_en.md new file mode 100644 index 00000000000000..f23947e624bdd2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_base_mdoc_hdct_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_base_mdoc_hdct BertForSequenceClassification from Luyu +author: John Snow Labs +name: bert_base_mdoc_hdct +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_mdoc_hdct` is a English model originally trained by Luyu. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_mdoc_hdct_en_5.1.4_3.4_1698221922423.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_mdoc_hdct_en_5.1.4_3.4_1698221922423.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_base_mdoc_hdct","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_base_mdoc_hdct","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_mdoc_hdct| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Luyu/bert-base-mdoc-hdct \ No newline at end of file From 4aaf3affacdf8c26747fda90d2d094df7e4542fa Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 17:54:00 +0700 Subject: [PATCH 199/301] Add model 2023-10-25-bert_ft_qqp_98_jeevesh8_en --- .../2023-10-25-bert_ft_qqp_98_jeevesh8_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_98_jeevesh8_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_98_jeevesh8_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_98_jeevesh8_en.md new file mode 100644 index 00000000000000..a26be80333e374 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_98_jeevesh8_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_ft_qqp_98_jeevesh8 BertForSequenceClassification from Jeevesh8 +author: John Snow Labs +name: bert_ft_qqp_98_jeevesh8 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_ft_qqp_98_jeevesh8` is a English model originally trained by Jeevesh8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_ft_qqp_98_jeevesh8_en_5.1.4_3.4_1698222930954.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_ft_qqp_98_jeevesh8_en_5.1.4_3.4_1698222930954.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_ft_qqp_98_jeevesh8","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_ft_qqp_98_jeevesh8","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_ft_qqp_98_jeevesh8| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Jeevesh8/bert_ft_qqp-98 \ No newline at end of file From 2e9a11ce60a4a7e12823365cc0f987adb97881c8 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 17:55:01 +0700 Subject: [PATCH 200/301] Add model 2023-10-25-bert_sequce_classifier_paraphrase_en --- ...25-bert_sequce_classifier_paraphrase_en.md | 100 ++++++++++++++++++ 1 file changed, 100 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_sequce_classifier_paraphrase_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_sequce_classifier_paraphrase_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_sequce_classifier_paraphrase_en.md new file mode 100644 index 00000000000000..01140afc54eaa6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_sequce_classifier_paraphrase_en.md @@ -0,0 +1,100 @@ +--- +layout: model +title: English BertForSequenceClassification Cased model (from Prompsit) +author: John Snow Labs +name: bert_sequce_classifier_paraphrase +date: 2023-10-25 +tags: [en, open_source, bert, sequence_classification, ner, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `paraphrase-bert-en` is a English model originally trained by `Prompsit`. + +## Predicted Entities + +`Not Paraphrase`, `Paraphrase` + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_sequce_classifier_paraphrase_en_5.1.4_3.4_1698231258424.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_sequce_classifier_paraphrase_en_5.1.4_3.4_1698231258424.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_sequce_classifier_paraphrase","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("class") + +pipeline = Pipeline(stages=[documentAssembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_sequce_classifier_paraphrase","en") + .setInputCols(Array("document", "token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_sequce_classifier_paraphrase| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|409.4 MB| +|Case sensitive:|true| +|Max sentence length:|128| + +## References + +References + +- https://huggingface.co/Prompsit/paraphrase-bert-en \ No newline at end of file From f0104df679655e1f99b09e9a797581761dc081ec Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 17:56:01 +0700 Subject: [PATCH 201/301] Add model 2023-10-25-mengzi_bert_base_fin_wallstreetcn_morning_news_market_overview_open_ssec_f1_v22_en --- ...ews_market_overview_open_ssec_f1_v22_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-mengzi_bert_base_fin_wallstreetcn_morning_news_market_overview_open_ssec_f1_v22_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-mengzi_bert_base_fin_wallstreetcn_morning_news_market_overview_open_ssec_f1_v22_en.md b/docs/_posts/ahmedlone127/2023-10-25-mengzi_bert_base_fin_wallstreetcn_morning_news_market_overview_open_ssec_f1_v22_en.md new file mode 100644 index 00000000000000..3cff0c16472972 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-mengzi_bert_base_fin_wallstreetcn_morning_news_market_overview_open_ssec_f1_v22_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English mengzi_bert_base_fin_wallstreetcn_morning_news_market_overview_open_ssec_f1_v22 BertForSequenceClassification from hw2942 +author: John Snow Labs +name: mengzi_bert_base_fin_wallstreetcn_morning_news_market_overview_open_ssec_f1_v22 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mengzi_bert_base_fin_wallstreetcn_morning_news_market_overview_open_ssec_f1_v22` is a English model originally trained by hw2942. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mengzi_bert_base_fin_wallstreetcn_morning_news_market_overview_open_ssec_f1_v22_en_5.1.4_3.4_1698223114906.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mengzi_bert_base_fin_wallstreetcn_morning_news_market_overview_open_ssec_f1_v22_en_5.1.4_3.4_1698223114906.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("mengzi_bert_base_fin_wallstreetcn_morning_news_market_overview_open_ssec_f1_v22","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("mengzi_bert_base_fin_wallstreetcn_morning_news_market_overview_open_ssec_f1_v22","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mengzi_bert_base_fin_wallstreetcn_morning_news_market_overview_open_ssec_f1_v22| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|383.1 MB| + +## References + +https://huggingface.co/hw2942/mengzi-bert-base-fin-wallstreetcn-morning-news-market-overview-open-SSEC-f1-v22 \ No newline at end of file From 2956179a5ee9b67bb2be57dc14068bb4b469dd69 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 17:57:01 +0700 Subject: [PATCH 202/301] Add model 2023-10-25-bert_sequence_classifier_persian_farsi_base_uncased_sentiment_digikala_fa --- ...arsi_base_uncased_sentiment_digikala_fa.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_sequence_classifier_persian_farsi_base_uncased_sentiment_digikala_fa.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_sequence_classifier_persian_farsi_base_uncased_sentiment_digikala_fa.md b/docs/_posts/ahmedlone127/2023-10-25-bert_sequence_classifier_persian_farsi_base_uncased_sentiment_digikala_fa.md new file mode 100644 index 00000000000000..343b06b8e5ed38 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_sequence_classifier_persian_farsi_base_uncased_sentiment_digikala_fa.md @@ -0,0 +1,97 @@ +--- +layout: model +title: Persian bert_sequence_classifier_persian_farsi_base_uncased_sentiment_digikala BertForSequenceClassification from HooshvareLab +author: John Snow Labs +name: bert_sequence_classifier_persian_farsi_base_uncased_sentiment_digikala +date: 2023-10-25 +tags: [bert, fa, open_source, sequence_classification, onnx] +task: Text Classification +language: fa +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_sequence_classifier_persian_farsi_base_uncased_sentiment_digikala` is a Persian model originally trained by HooshvareLab. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_sequence_classifier_persian_farsi_base_uncased_sentiment_digikala_fa_5.1.4_3.4_1698210264996.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_sequence_classifier_persian_farsi_base_uncased_sentiment_digikala_fa_5.1.4_3.4_1698210264996.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_sequence_classifier_persian_farsi_base_uncased_sentiment_digikala","fa")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_sequence_classifier_persian_farsi_base_uncased_sentiment_digikala","fa") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_sequence_classifier_persian_farsi_base_uncased_sentiment_digikala| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|fa| +|Size:|608.7 MB| + +## References + +https://huggingface.co/HooshvareLab/bert-fa-base-uncased-sentiment-digikala \ No newline at end of file From ff86a416c5ab34a0cf9cada62b842d25f8a204de Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 17:58:01 +0700 Subject: [PATCH 203/301] Add model 2023-10-25-fine_tuned_indonli_basic_with_indobert_base_uncased_muhammadravi251001_en --- ...bert_base_uncased_muhammadravi251001_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-fine_tuned_indonli_basic_with_indobert_base_uncased_muhammadravi251001_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-fine_tuned_indonli_basic_with_indobert_base_uncased_muhammadravi251001_en.md b/docs/_posts/ahmedlone127/2023-10-25-fine_tuned_indonli_basic_with_indobert_base_uncased_muhammadravi251001_en.md new file mode 100644 index 00000000000000..ff7efc7423853a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-fine_tuned_indonli_basic_with_indobert_base_uncased_muhammadravi251001_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English fine_tuned_indonli_basic_with_indobert_base_uncased_muhammadravi251001 BertForSequenceClassification from muhammadravi251001 +author: John Snow Labs +name: fine_tuned_indonli_basic_with_indobert_base_uncased_muhammadravi251001 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`fine_tuned_indonli_basic_with_indobert_base_uncased_muhammadravi251001` is a English model originally trained by muhammadravi251001. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/fine_tuned_indonli_basic_with_indobert_base_uncased_muhammadravi251001_en_5.1.4_3.4_1698225044555.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/fine_tuned_indonli_basic_with_indobert_base_uncased_muhammadravi251001_en_5.1.4_3.4_1698225044555.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("fine_tuned_indonli_basic_with_indobert_base_uncased_muhammadravi251001","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("fine_tuned_indonli_basic_with_indobert_base_uncased_muhammadravi251001","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|fine_tuned_indonli_basic_with_indobert_base_uncased_muhammadravi251001| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|413.9 MB| + +## References + +https://huggingface.co/muhammadravi251001/fine-tuned-IndoNLI-Basic-with-indobert-base-uncased \ No newline at end of file From 5c40dfaa0c8ce52f2c36e64718133af43591d7f4 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 17:59:01 +0700 Subject: [PATCH 204/301] Add model 2023-10-25-bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_1_en --- ..._morning_news_market_overview_ssec_1_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_1_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_1_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_1_en.md new file mode 100644 index 00000000000000..1c9ac7f7c442c3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_1_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_1 BertForSequenceClassification from hw2942 +author: John Snow Labs +name: bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_1 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_1` is a English model originally trained by hw2942. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_1_en_5.1.4_3.4_1698231415062.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_1_en_5.1.4_3.4_1698231415062.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_1","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_1","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_1| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|383.3 MB| + +## References + +https://huggingface.co/hw2942/bert-base-chinese-wallstreetcn-morning-news-market-overview-SSEC-1 \ No newline at end of file From 43104e79232dd4a83159f5de1895bfa9df9d1a5f Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 18:00:02 +0700 Subject: [PATCH 205/301] Add model 2023-10-25-goog_bert_ft_cola_40_en --- .../2023-10-25-goog_bert_ft_cola_40_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_40_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_40_en.md b/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_40_en.md new file mode 100644 index 00000000000000..cb7ef6fdf253d1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_40_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English goog_bert_ft_cola_40 BertForSequenceClassification from Jeevesh8 +author: John Snow Labs +name: goog_bert_ft_cola_40 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`goog_bert_ft_cola_40` is a English model originally trained by Jeevesh8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/goog_bert_ft_cola_40_en_5.1.4_3.4_1698221167152.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/goog_bert_ft_cola_40_en_5.1.4_3.4_1698221167152.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("goog_bert_ft_cola_40","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("goog_bert_ft_cola_40","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|goog_bert_ft_cola_40| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Jeevesh8/goog_bert_ft_cola-40 \ No newline at end of file From b3784ae36ced60ae3f63fe99c8e6b8be71fe45a3 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 18:01:02 +0700 Subject: [PATCH 206/301] Add model 2023-10-25-goog_bert_ft_cola_50_en --- .../2023-10-25-goog_bert_ft_cola_50_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_50_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_50_en.md b/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_50_en.md new file mode 100644 index 00000000000000..cd94e000f51da6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_50_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English goog_bert_ft_cola_50 BertForSequenceClassification from Jeevesh8 +author: John Snow Labs +name: goog_bert_ft_cola_50 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`goog_bert_ft_cola_50` is a English model originally trained by Jeevesh8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/goog_bert_ft_cola_50_en_5.1.4_3.4_1698231600698.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/goog_bert_ft_cola_50_en_5.1.4_3.4_1698231600698.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("goog_bert_ft_cola_50","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("goog_bert_ft_cola_50","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|goog_bert_ft_cola_50| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Jeevesh8/goog_bert_ft_cola-50 \ No newline at end of file From 5c977ffdc697800298caeeac34f28732ab77368f Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 18:02:02 +0700 Subject: [PATCH 207/301] Add model 2023-10-25-goog_bert_ft_cola_12_en --- .../2023-10-25-goog_bert_ft_cola_12_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_12_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_12_en.md b/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_12_en.md new file mode 100644 index 00000000000000..d407570ebbe125 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_12_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English goog_bert_ft_cola_12 BertForSequenceClassification from Jeevesh8 +author: John Snow Labs +name: goog_bert_ft_cola_12 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`goog_bert_ft_cola_12` is a English model originally trained by Jeevesh8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/goog_bert_ft_cola_12_en_5.1.4_3.4_1698216256946.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/goog_bert_ft_cola_12_en_5.1.4_3.4_1698216256946.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("goog_bert_ft_cola_12","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("goog_bert_ft_cola_12","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|goog_bert_ft_cola_12| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Jeevesh8/goog_bert_ft_cola-12 \ No newline at end of file From 787405a91fe916e121c06424d86ae20d5b233255 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 18:03:03 +0700 Subject: [PATCH 208/301] Add model 2023-10-25-bert_base_uncased_finetuned_hateful_meme_en --- ..._base_uncased_finetuned_hateful_meme_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_base_uncased_finetuned_hateful_meme_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_base_uncased_finetuned_hateful_meme_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_base_uncased_finetuned_hateful_meme_en.md new file mode 100644 index 00000000000000..981f61563c0a60 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_base_uncased_finetuned_hateful_meme_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_base_uncased_finetuned_hateful_meme BertForSequenceClassification from tommilyjones +author: John Snow Labs +name: bert_base_uncased_finetuned_hateful_meme +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_finetuned_hateful_meme` is a English model originally trained by tommilyjones. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_finetuned_hateful_meme_en_5.1.4_3.4_1698218460479.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_finetuned_hateful_meme_en_5.1.4_3.4_1698218460479.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_base_uncased_finetuned_hateful_meme","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_base_uncased_finetuned_hateful_meme","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_finetuned_hateful_meme| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/tommilyjones/bert-base-uncased-finetuned-hateful-meme \ No newline at end of file From 15fc1fa81f9a78891dd861c69b5122cdeb2123c8 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 18:04:03 +0700 Subject: [PATCH 209/301] Add model 2023-10-25-bert_ft_cola_96_en --- .../2023-10-25-bert_ft_cola_96_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_ft_cola_96_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_ft_cola_96_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_ft_cola_96_en.md new file mode 100644 index 00000000000000..ad2da76485c85f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_ft_cola_96_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_ft_cola_96 BertForSequenceClassification from Jeevesh8 +author: John Snow Labs +name: bert_ft_cola_96 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_ft_cola_96` is a English model originally trained by Jeevesh8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_ft_cola_96_en_5.1.4_3.4_1698231828253.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_ft_cola_96_en_5.1.4_3.4_1698231828253.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_ft_cola_96","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_ft_cola_96","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_ft_cola_96| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Jeevesh8/bert_ft_cola-96 \ No newline at end of file From f1ce5f8dae213d946f699ed4c072028a1712ee45 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 18:05:03 +0700 Subject: [PATCH 210/301] Add model 2023-10-25-bert_ft_qqp_72_jeevesh8_en --- .../2023-10-25-bert_ft_qqp_72_jeevesh8_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_72_jeevesh8_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_72_jeevesh8_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_72_jeevesh8_en.md new file mode 100644 index 00000000000000..825dee7b6c2872 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_72_jeevesh8_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_ft_qqp_72_jeevesh8 BertForSequenceClassification from Jeevesh8 +author: John Snow Labs +name: bert_ft_qqp_72_jeevesh8 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_ft_qqp_72_jeevesh8` is a English model originally trained by Jeevesh8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_ft_qqp_72_jeevesh8_en_5.1.4_3.4_1698217803909.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_ft_qqp_72_jeevesh8_en_5.1.4_3.4_1698217803909.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_ft_qqp_72_jeevesh8","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_ft_qqp_72_jeevesh8","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_ft_qqp_72_jeevesh8| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Jeevesh8/bert_ft_qqp-72 \ No newline at end of file From 998ad6a78fba59bcd0460bde65febe915c0d3eac Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 18:06:03 +0700 Subject: [PATCH 211/301] Add model 2023-10-25-goog_bert_ft_cola_22_en --- .../2023-10-25-goog_bert_ft_cola_22_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_22_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_22_en.md b/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_22_en.md new file mode 100644 index 00000000000000..b40c94252c46a3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_22_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English goog_bert_ft_cola_22 BertForSequenceClassification from Jeevesh8 +author: John Snow Labs +name: goog_bert_ft_cola_22 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`goog_bert_ft_cola_22` is a English model originally trained by Jeevesh8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/goog_bert_ft_cola_22_en_5.1.4_3.4_1698217792710.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/goog_bert_ft_cola_22_en_5.1.4_3.4_1698217792710.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("goog_bert_ft_cola_22","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("goog_bert_ft_cola_22","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|goog_bert_ft_cola_22| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Jeevesh8/goog_bert_ft_cola-22 \ No newline at end of file From 7075e050596a8e114b24810580f2347071acb649 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 18:07:04 +0700 Subject: [PATCH 212/301] Add model 2023-10-25-bert_wikipedia_sst2_en --- .../2023-10-25-bert_wikipedia_sst2_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_wikipedia_sst2_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_wikipedia_sst2_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_wikipedia_sst2_en.md new file mode 100644 index 00000000000000..0decb892f3ea53 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_wikipedia_sst2_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_wikipedia_sst2 BertForSequenceClassification from deepesh0x +author: John Snow Labs +name: bert_wikipedia_sst2 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_wikipedia_sst2` is a English model originally trained by deepesh0x. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_wikipedia_sst2_en_5.1.4_3.4_1698211274392.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_wikipedia_sst2_en_5.1.4_3.4_1698211274392.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_wikipedia_sst2","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_wikipedia_sst2","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_wikipedia_sst2| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/deepesh0x/bert_wikipedia_sst2 \ No newline at end of file From 81ab065022f52d17ed7e973efe1348881311d2b7 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 18:08:04 +0700 Subject: [PATCH 213/301] Add model 2023-10-25-goog_bert_ft_cola_26_en --- .../2023-10-25-goog_bert_ft_cola_26_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_26_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_26_en.md b/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_26_en.md new file mode 100644 index 00000000000000..5ad2b7a3ce6c60 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_26_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English goog_bert_ft_cola_26 BertForSequenceClassification from Jeevesh8 +author: John Snow Labs +name: goog_bert_ft_cola_26 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`goog_bert_ft_cola_26` is a English model originally trained by Jeevesh8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/goog_bert_ft_cola_26_en_5.1.4_3.4_1698219644965.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/goog_bert_ft_cola_26_en_5.1.4_3.4_1698219644965.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("goog_bert_ft_cola_26","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("goog_bert_ft_cola_26","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|goog_bert_ft_cola_26| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Jeevesh8/goog_bert_ft_cola-26 \ No newline at end of file From 2f2b2d09084d8bd4622a1b89ce52a31dc4321728 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 18:09:04 +0700 Subject: [PATCH 214/301] Add model 2023-10-25-bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_2_en --- ..._morning_news_market_overview_ssec_2_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_2_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_2_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_2_en.md new file mode 100644 index 00000000000000..f37188738f45cf --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_2_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_2 BertForSequenceClassification from hw2942 +author: John Snow Labs +name: bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_2 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_2` is a English model originally trained by hw2942. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_2_en_5.1.4_3.4_1698232117390.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_2_en_5.1.4_3.4_1698232117390.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_2","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_2","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_2| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|383.3 MB| + +## References + +https://huggingface.co/hw2942/bert-base-chinese-wallstreetcn-morning-news-market-overview-SSEC-2 \ No newline at end of file From e7be650f098c642d9589d29ab51e2e397cb23d80 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 18:10:05 +0700 Subject: [PATCH 215/301] Add model 2023-10-25-bert_deepfake_bulgarian_multiclass_bg --- ...5-bert_deepfake_bulgarian_multiclass_bg.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_deepfake_bulgarian_multiclass_bg.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_deepfake_bulgarian_multiclass_bg.md b/docs/_posts/ahmedlone127/2023-10-25-bert_deepfake_bulgarian_multiclass_bg.md new file mode 100644 index 00000000000000..e73f8e0fd9bfe6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_deepfake_bulgarian_multiclass_bg.md @@ -0,0 +1,97 @@ +--- +layout: model +title: Bulgarian bert_deepfake_bulgarian_multiclass BertForSequenceClassification from usmiva +author: John Snow Labs +name: bert_deepfake_bulgarian_multiclass +date: 2023-10-25 +tags: [bert, bg, open_source, sequence_classification, onnx] +task: Text Classification +language: bg +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_deepfake_bulgarian_multiclass` is a Bulgarian model originally trained by usmiva. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_deepfake_bulgarian_multiclass_bg_5.1.4_3.4_1698212606456.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_deepfake_bulgarian_multiclass_bg_5.1.4_3.4_1698212606456.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_deepfake_bulgarian_multiclass","bg")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_deepfake_bulgarian_multiclass","bg") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_deepfake_bulgarian_multiclass| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|bg| +|Size:|409.1 MB| + +## References + +https://huggingface.co/usmiva/bert-deepfake-bg-multiclass \ No newline at end of file From 324b44980ab68df94391f2eb86575ea855f77f67 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 18:11:05 +0700 Subject: [PATCH 216/301] Add model 2023-10-25-bert_ft_qqp_52_jeevesh8_en --- .../2023-10-25-bert_ft_qqp_52_jeevesh8_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_52_jeevesh8_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_52_jeevesh8_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_52_jeevesh8_en.md new file mode 100644 index 00000000000000..cfc8b90c280ca1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_52_jeevesh8_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_ft_qqp_52_jeevesh8 BertForSequenceClassification from Jeevesh8 +author: John Snow Labs +name: bert_ft_qqp_52_jeevesh8 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_ft_qqp_52_jeevesh8` is a English model originally trained by Jeevesh8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_ft_qqp_52_jeevesh8_en_5.1.4_3.4_1698213920269.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_ft_qqp_52_jeevesh8_en_5.1.4_3.4_1698213920269.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_ft_qqp_52_jeevesh8","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_ft_qqp_52_jeevesh8","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_ft_qqp_52_jeevesh8| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Jeevesh8/bert_ft_qqp-52 \ No newline at end of file From 89b99f33e73bab4a18ee79f3082f3a27ab9be823 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 18:12:05 +0700 Subject: [PATCH 217/301] Add model 2023-10-25-bert_ft_qqp_56_jeevesh8_en --- .../2023-10-25-bert_ft_qqp_56_jeevesh8_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_56_jeevesh8_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_56_jeevesh8_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_56_jeevesh8_en.md new file mode 100644 index 00000000000000..c733153ce00af4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_56_jeevesh8_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_ft_qqp_56_jeevesh8 BertForSequenceClassification from Jeevesh8 +author: John Snow Labs +name: bert_ft_qqp_56_jeevesh8 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_ft_qqp_56_jeevesh8` is a English model originally trained by Jeevesh8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_ft_qqp_56_jeevesh8_en_5.1.4_3.4_1698214624241.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_ft_qqp_56_jeevesh8_en_5.1.4_3.4_1698214624241.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_ft_qqp_56_jeevesh8","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_ft_qqp_56_jeevesh8","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_ft_qqp_56_jeevesh8| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Jeevesh8/bert_ft_qqp-56 \ No newline at end of file From 1b2e16e83ab03d84f6b99a477e8fffe3853b35db Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 18:13:06 +0700 Subject: [PATCH 218/301] Add model 2023-10-25-goog_bert_ft_cola_49_en --- .../2023-10-25-goog_bert_ft_cola_49_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_49_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_49_en.md b/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_49_en.md new file mode 100644 index 00000000000000..9f0c57140f6d0d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_49_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English goog_bert_ft_cola_49 BertForSequenceClassification from Jeevesh8 +author: John Snow Labs +name: goog_bert_ft_cola_49 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`goog_bert_ft_cola_49` is a English model originally trained by Jeevesh8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/goog_bert_ft_cola_49_en_5.1.4_3.4_1698229026818.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/goog_bert_ft_cola_49_en_5.1.4_3.4_1698229026818.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("goog_bert_ft_cola_49","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("goog_bert_ft_cola_49","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|goog_bert_ft_cola_49| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Jeevesh8/goog_bert_ft_cola-49 \ No newline at end of file From 604ef66119f2a173152ff110c13cfca4be79f095 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 18:14:06 +0700 Subject: [PATCH 219/301] Add model 2023-10-25-goog_bert_ft_cola_64_en --- .../2023-10-25-goog_bert_ft_cola_64_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_64_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_64_en.md b/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_64_en.md new file mode 100644 index 00000000000000..fc8b6cad2d4af7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_64_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English goog_bert_ft_cola_64 BertForSequenceClassification from Jeevesh8 +author: John Snow Labs +name: goog_bert_ft_cola_64 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`goog_bert_ft_cola_64` is a English model originally trained by Jeevesh8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/goog_bert_ft_cola_64_en_5.1.4_3.4_1698232386985.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/goog_bert_ft_cola_64_en_5.1.4_3.4_1698232386985.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("goog_bert_ft_cola_64","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("goog_bert_ft_cola_64","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|goog_bert_ft_cola_64| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Jeevesh8/goog_bert_ft_cola-64 \ No newline at end of file From 9b6489caba89ea78a305b0830c73822dc480cad1 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 18:15:06 +0700 Subject: [PATCH 220/301] Add model 2023-10-25-indobertnewstest_rizalmilyardi_en --- ...10-25-indobertnewstest_rizalmilyardi_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-indobertnewstest_rizalmilyardi_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-indobertnewstest_rizalmilyardi_en.md b/docs/_posts/ahmedlone127/2023-10-25-indobertnewstest_rizalmilyardi_en.md new file mode 100644 index 00000000000000..461c447d6d40dc --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-indobertnewstest_rizalmilyardi_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English indobertnewstest_rizalmilyardi BertForSequenceClassification from rizalmilyardi +author: John Snow Labs +name: indobertnewstest_rizalmilyardi +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`indobertnewstest_rizalmilyardi` is a English model originally trained by rizalmilyardi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/indobertnewstest_rizalmilyardi_en_5.1.4_3.4_1698211956208.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/indobertnewstest_rizalmilyardi_en_5.1.4_3.4_1698211956208.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("indobertnewstest_rizalmilyardi","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("indobertnewstest_rizalmilyardi","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|indobertnewstest_rizalmilyardi| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|414.0 MB| + +## References + +https://huggingface.co/rizalmilyardi/IndobertNewsTest \ No newline at end of file From e07b2c1e91e46c0ab5232f63691a9b510114ded5 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 18:16:06 +0700 Subject: [PATCH 221/301] Add model 2023-10-25-a01_suicide_bert_huggingface_finetune_en --- ...01_suicide_bert_huggingface_finetune_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-a01_suicide_bert_huggingface_finetune_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-a01_suicide_bert_huggingface_finetune_en.md b/docs/_posts/ahmedlone127/2023-10-25-a01_suicide_bert_huggingface_finetune_en.md new file mode 100644 index 00000000000000..b6f2767b0f2302 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-a01_suicide_bert_huggingface_finetune_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English a01_suicide_bert_huggingface_finetune BertForSequenceClassification from Ariffnaz +author: John Snow Labs +name: a01_suicide_bert_huggingface_finetune +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`a01_suicide_bert_huggingface_finetune` is a English model originally trained by Ariffnaz. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/a01_suicide_bert_huggingface_finetune_en_5.1.4_3.4_1698231600777.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/a01_suicide_bert_huggingface_finetune_en_5.1.4_3.4_1698231600777.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("a01_suicide_bert_huggingface_finetune","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("a01_suicide_bert_huggingface_finetune","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|a01_suicide_bert_huggingface_finetune| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Ariffnaz/a01-suicide-bert-huggingface-finetune \ No newline at end of file From 791c2fbe07f8729b1294b725412c52ff6f877bf8 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 18:17:06 +0700 Subject: [PATCH 222/301] Add model 2023-10-25-bert_base_arabic_camelbert_msa_finetuned_arabic_dialect_identification_model_1_ar --- ...rabic_dialect_identification_model_1_ar.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_base_arabic_camelbert_msa_finetuned_arabic_dialect_identification_model_1_ar.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_base_arabic_camelbert_msa_finetuned_arabic_dialect_identification_model_1_ar.md b/docs/_posts/ahmedlone127/2023-10-25-bert_base_arabic_camelbert_msa_finetuned_arabic_dialect_identification_model_1_ar.md new file mode 100644 index 00000000000000..9e7c77824e37aa --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_base_arabic_camelbert_msa_finetuned_arabic_dialect_identification_model_1_ar.md @@ -0,0 +1,97 @@ +--- +layout: model +title: Arabic bert_base_arabic_camelbert_msa_finetuned_arabic_dialect_identification_model_1 BertForSequenceClassification from Abdelrahman-Rezk +author: John Snow Labs +name: bert_base_arabic_camelbert_msa_finetuned_arabic_dialect_identification_model_1 +date: 2023-10-25 +tags: [bert, ar, open_source, sequence_classification, onnx] +task: Text Classification +language: ar +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_arabic_camelbert_msa_finetuned_arabic_dialect_identification_model_1` is a Arabic model originally trained by Abdelrahman-Rezk. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_arabic_camelbert_msa_finetuned_arabic_dialect_identification_model_1_ar_5.1.4_3.4_1698213574208.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_arabic_camelbert_msa_finetuned_arabic_dialect_identification_model_1_ar_5.1.4_3.4_1698213574208.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_base_arabic_camelbert_msa_finetuned_arabic_dialect_identification_model_1","ar")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_base_arabic_camelbert_msa_finetuned_arabic_dialect_identification_model_1","ar") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_arabic_camelbert_msa_finetuned_arabic_dialect_identification_model_1| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|ar| +|Size:|408.6 MB| + +## References + +https://huggingface.co/Abdelrahman-Rezk/bert-base-arabic-camelbert-msa-finetuned-Arabic_Dialect_Identification_model_1 \ No newline at end of file From f667768313ecff27e118b680a72ca38e19da151c Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 18:18:07 +0700 Subject: [PATCH 223/301] Add model 2023-10-25-bert_base_uncased_sst_2_16_13_30_en --- ...-25-bert_base_uncased_sst_2_16_13_30_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_base_uncased_sst_2_16_13_30_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_base_uncased_sst_2_16_13_30_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_base_uncased_sst_2_16_13_30_en.md new file mode 100644 index 00000000000000..e23331bdfb588a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_base_uncased_sst_2_16_13_30_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_base_uncased_sst_2_16_13_30 BertForSequenceClassification from simonycl +author: John Snow Labs +name: bert_base_uncased_sst_2_16_13_30 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_sst_2_16_13_30` is a English model originally trained by simonycl. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_sst_2_16_13_30_en_5.1.4_3.4_1698222121181.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_sst_2_16_13_30_en_5.1.4_3.4_1698222121181.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_base_uncased_sst_2_16_13_30","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_base_uncased_sst_2_16_13_30","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_sst_2_16_13_30| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/simonycl/bert-base-uncased-sst-2-16-13-30 \ No newline at end of file From 6d9a6a4944e9ae1e28828ebb932c57d9359709ee Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 18:19:07 +0700 Subject: [PATCH 224/301] Add model 2023-10-25-bert_ft_cola_97_en --- .../2023-10-25-bert_ft_cola_97_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_ft_cola_97_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_ft_cola_97_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_ft_cola_97_en.md new file mode 100644 index 00000000000000..3fe7a26782da02 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_ft_cola_97_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_ft_cola_97 BertForSequenceClassification from Jeevesh8 +author: John Snow Labs +name: bert_ft_cola_97 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_ft_cola_97` is a English model originally trained by Jeevesh8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_ft_cola_97_en_5.1.4_3.4_1698232593733.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_ft_cola_97_en_5.1.4_3.4_1698232593733.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_ft_cola_97","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_ft_cola_97","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_ft_cola_97| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Jeevesh8/bert_ft_cola-97 \ No newline at end of file From 5fde0b4e019017bb8b816dd7e3577beb9c46597e Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 18:20:07 +0700 Subject: [PATCH 225/301] Add model 2023-10-25-bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_3_en --- ..._morning_news_market_overview_ssec_3_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_3_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_3_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_3_en.md new file mode 100644 index 00000000000000..d68f331ff6c819 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_3_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_3 BertForSequenceClassification from hw2942 +author: John Snow Labs +name: bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_3 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_3` is a English model originally trained by hw2942. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_3_en_5.1.4_3.4_1698232786971.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_3_en_5.1.4_3.4_1698232786971.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_3","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_3","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_3| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|383.3 MB| + +## References + +https://huggingface.co/hw2942/bert-base-chinese-wallstreetcn-morning-news-market-overview-SSEC-3 \ No newline at end of file From 31bc1675339ed6350da0fa2465140927cbad393c Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 18:21:08 +0700 Subject: [PATCH 226/301] Add model 2023-10-25-bert_ft_qqp_45_jeevesh8_en --- .../2023-10-25-bert_ft_qqp_45_jeevesh8_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_45_jeevesh8_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_45_jeevesh8_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_45_jeevesh8_en.md new file mode 100644 index 00000000000000..76a4bbff36664a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_45_jeevesh8_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_ft_qqp_45_jeevesh8 BertForSequenceClassification from Jeevesh8 +author: John Snow Labs +name: bert_ft_qqp_45_jeevesh8 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_ft_qqp_45_jeevesh8` is a English model originally trained by Jeevesh8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_ft_qqp_45_jeevesh8_en_5.1.4_3.4_1698212646870.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_ft_qqp_45_jeevesh8_en_5.1.4_3.4_1698212646870.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_ft_qqp_45_jeevesh8","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_ft_qqp_45_jeevesh8","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_ft_qqp_45_jeevesh8| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Jeevesh8/bert_ft_qqp-45 \ No newline at end of file From 393999a3cd762a1bd66395f11249e38a7f746acd Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 18:22:08 +0700 Subject: [PATCH 227/301] Add model 2023-10-25-goog_bert_ft_cola_32_en --- .../2023-10-25-goog_bert_ft_cola_32_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_32_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_32_en.md b/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_32_en.md new file mode 100644 index 00000000000000..64ea0587bf2874 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_32_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English goog_bert_ft_cola_32 BertForSequenceClassification from Jeevesh8 +author: John Snow Labs +name: goog_bert_ft_cola_32 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`goog_bert_ft_cola_32` is a English model originally trained by Jeevesh8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/goog_bert_ft_cola_32_en_5.1.4_3.4_1698219435732.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/goog_bert_ft_cola_32_en_5.1.4_3.4_1698219435732.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("goog_bert_ft_cola_32","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("goog_bert_ft_cola_32","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|goog_bert_ft_cola_32| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Jeevesh8/goog_bert_ft_cola-32 \ No newline at end of file From 617bad35282cc11c3ae1aea7d9a77085669f050e Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 18:23:08 +0700 Subject: [PATCH 228/301] Add model 2023-10-25-goog_bert_ft_cola_74_en --- .../2023-10-25-goog_bert_ft_cola_74_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_74_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_74_en.md b/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_74_en.md new file mode 100644 index 00000000000000..2d4bc553a6b360 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_74_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English goog_bert_ft_cola_74 BertForSequenceClassification from Jeevesh8 +author: John Snow Labs +name: goog_bert_ft_cola_74 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`goog_bert_ft_cola_74` is a English model originally trained by Jeevesh8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/goog_bert_ft_cola_74_en_5.1.4_3.4_1698223397781.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/goog_bert_ft_cola_74_en_5.1.4_3.4_1698223397781.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("goog_bert_ft_cola_74","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("goog_bert_ft_cola_74","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|goog_bert_ft_cola_74| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Jeevesh8/goog_bert_ft_cola-74 \ No newline at end of file From 87ad5cc74c605e1bd1f6e23c61d0d30b9df3ce8e Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 18:24:09 +0700 Subject: [PATCH 229/301] Add model 2023-10-25-norbert2_sentiment_norec_2_en --- ...023-10-25-norbert2_sentiment_norec_2_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-norbert2_sentiment_norec_2_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-norbert2_sentiment_norec_2_en.md b/docs/_posts/ahmedlone127/2023-10-25-norbert2_sentiment_norec_2_en.md new file mode 100644 index 00000000000000..0485e80e906c86 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-norbert2_sentiment_norec_2_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English norbert2_sentiment_norec_2 BertForSequenceClassification from NTCAL +author: John Snow Labs +name: norbert2_sentiment_norec_2 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`norbert2_sentiment_norec_2` is a English model originally trained by NTCAL. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/norbert2_sentiment_norec_2_en_5.1.4_3.4_1698233003739.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/norbert2_sentiment_norec_2_en_5.1.4_3.4_1698233003739.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("norbert2_sentiment_norec_2","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("norbert2_sentiment_norec_2","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|norbert2_sentiment_norec_2| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/NTCAL/norbert2_sentiment_norec_2 \ No newline at end of file From 47777075b3ef6f0e0e8db51b60ff74259c4b867a Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 18:25:09 +0700 Subject: [PATCH 230/301] Add model 2023-10-25-biobert_finetuned_genetic_mutation_en --- ...5-biobert_finetuned_genetic_mutation_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-biobert_finetuned_genetic_mutation_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-biobert_finetuned_genetic_mutation_en.md b/docs/_posts/ahmedlone127/2023-10-25-biobert_finetuned_genetic_mutation_en.md new file mode 100644 index 00000000000000..9a1ce98a172d31 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-biobert_finetuned_genetic_mutation_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English biobert_finetuned_genetic_mutation BertForSequenceClassification from wahdan99 +author: John Snow Labs +name: biobert_finetuned_genetic_mutation +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`biobert_finetuned_genetic_mutation` is a English model originally trained by wahdan99. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/biobert_finetuned_genetic_mutation_en_5.1.4_3.4_1698224837394.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/biobert_finetuned_genetic_mutation_en_5.1.4_3.4_1698224837394.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("biobert_finetuned_genetic_mutation","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("biobert_finetuned_genetic_mutation","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|biobert_finetuned_genetic_mutation| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|405.3 MB| + +## References + +https://huggingface.co/wahdan99/biobert-finetuned-genetic-mutation \ No newline at end of file From 961311db68e21d2757519ac3d2f1fdd42d3595ff Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 18:26:09 +0700 Subject: [PATCH 231/301] Add model 2023-10-25-bert_sequence_classifier_paraphrase_pt --- ...-bert_sequence_classifier_paraphrase_pt.md | 100 ++++++++++++++++++ 1 file changed, 100 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_sequence_classifier_paraphrase_pt.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_sequence_classifier_paraphrase_pt.md b/docs/_posts/ahmedlone127/2023-10-25-bert_sequence_classifier_paraphrase_pt.md new file mode 100644 index 00000000000000..c5ff6d7364ff75 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_sequence_classifier_paraphrase_pt.md @@ -0,0 +1,100 @@ +--- +layout: model +title: Portuguese BertForSequenceClassification Cased model (from Prompsit) +author: John Snow Labs +name: bert_sequence_classifier_paraphrase +date: 2023-10-25 +tags: [pt, open_source, bert, sequence_classification, ner, onnx] +task: Named Entity Recognition +language: pt +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `paraphrase-bert-pt` is a Portuguese model originally trained by `Prompsit`. + +## Predicted Entities + +`Not Paraphrase`, `Paraphrase` + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_sequence_classifier_paraphrase_pt_5.1.4_3.4_1698232661863.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_sequence_classifier_paraphrase_pt_5.1.4_3.4_1698232661863.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_sequence_classifier_paraphrase","pt") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("class") + +pipeline = Pipeline(stages=[documentAssembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_sequence_classifier_paraphrase","pt") + .setInputCols(Array("document", "token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_sequence_classifier_paraphrase| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|pt| +|Size:|408.2 MB| +|Case sensitive:|true| +|Max sentence length:|128| + +## References + +References + +- https://huggingface.co/Prompsit/paraphrase-bert-pt \ No newline at end of file From 60ac69e0353ba7fe973a1d343a49d03134090f68 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 18:27:09 +0700 Subject: [PATCH 232/301] Add model 2023-10-25-bert_sim_doc_en --- .../2023-10-25-bert_sim_doc_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_sim_doc_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_sim_doc_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_sim_doc_en.md new file mode 100644 index 00000000000000..b597749fd4d9d9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_sim_doc_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_sim_doc BertForSequenceClassification from LilaBoualili +author: John Snow Labs +name: bert_sim_doc +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_sim_doc` is a English model originally trained by LilaBoualili. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_sim_doc_en_5.1.4_3.4_1698221193643.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_sim_doc_en_5.1.4_3.4_1698221193643.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_sim_doc","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_sim_doc","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_sim_doc| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/LilaBoualili/bert-sim-doc \ No newline at end of file From cf861ed689e442b0f6b3e90110f2397417101459 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 18:28:09 +0700 Subject: [PATCH 233/301] Add model 2023-10-25-goog_bert_ft_cola_70_en --- .../2023-10-25-goog_bert_ft_cola_70_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_70_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_70_en.md b/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_70_en.md new file mode 100644 index 00000000000000..b609c5b69696b2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_70_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English goog_bert_ft_cola_70 BertForSequenceClassification from Jeevesh8 +author: John Snow Labs +name: goog_bert_ft_cola_70 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`goog_bert_ft_cola_70` is a English model originally trained by Jeevesh8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/goog_bert_ft_cola_70_en_5.1.4_3.4_1698233231183.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/goog_bert_ft_cola_70_en_5.1.4_3.4_1698233231183.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("goog_bert_ft_cola_70","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("goog_bert_ft_cola_70","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|goog_bert_ft_cola_70| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Jeevesh8/goog_bert_ft_cola-70 \ No newline at end of file From fdbe659f09f8c32f5773dcb111780d7d05dfc0df Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 18:29:09 +0700 Subject: [PATCH 234/301] Add model 2023-10-25-goog_bert_ft_cola_42_en --- .../2023-10-25-goog_bert_ft_cola_42_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_42_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_42_en.md b/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_42_en.md new file mode 100644 index 00000000000000..208db8035d1ad9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_42_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English goog_bert_ft_cola_42 BertForSequenceClassification from Jeevesh8 +author: John Snow Labs +name: goog_bert_ft_cola_42 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`goog_bert_ft_cola_42` is a English model originally trained by Jeevesh8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/goog_bert_ft_cola_42_en_5.1.4_3.4_1698221947943.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/goog_bert_ft_cola_42_en_5.1.4_3.4_1698221947943.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("goog_bert_ft_cola_42","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("goog_bert_ft_cola_42","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|goog_bert_ft_cola_42| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Jeevesh8/goog_bert_ft_cola-42 \ No newline at end of file From cc143860d9de0b04e2d51cf168ca9320f7709cc0 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 18:30:10 +0700 Subject: [PATCH 235/301] Add model 2023-10-25-bert_finetuning_test_itcastai_en --- ...-10-25-bert_finetuning_test_itcastai_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_finetuning_test_itcastai_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_finetuning_test_itcastai_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_finetuning_test_itcastai_en.md new file mode 100644 index 00000000000000..f0e2e41f58598e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_finetuning_test_itcastai_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_finetuning_test_itcastai BertForSequenceClassification from ItcastAI +author: John Snow Labs +name: bert_finetuning_test_itcastai +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_finetuning_test_itcastai` is a English model originally trained by ItcastAI. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_finetuning_test_itcastai_en_5.1.4_3.4_1698211632257.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_finetuning_test_itcastai_en_5.1.4_3.4_1698211632257.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_finetuning_test_itcastai","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_finetuning_test_itcastai","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_finetuning_test_itcastai| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/ItcastAI/bert_finetuning_test \ No newline at end of file From 2f0e061bd1e77988752014b84ce7ebb73c10606b Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 18:31:10 +0700 Subject: [PATCH 236/301] Add model 2023-10-25-bert_ft_cola_98_en --- .../2023-10-25-bert_ft_cola_98_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_ft_cola_98_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_ft_cola_98_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_ft_cola_98_en.md new file mode 100644 index 00000000000000..66a9f195cc2db1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_ft_cola_98_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_ft_cola_98 BertForSequenceClassification from Jeevesh8 +author: John Snow Labs +name: bert_ft_cola_98 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_ft_cola_98` is a English model originally trained by Jeevesh8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_ft_cola_98_en_5.1.4_3.4_1698233424850.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_ft_cola_98_en_5.1.4_3.4_1698233424850.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_ft_cola_98","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_ft_cola_98","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_ft_cola_98| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Jeevesh8/bert_ft_cola-98 \ No newline at end of file From acf2b4c93de542334c5f2bfd01132202030276c9 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 18:32:10 +0700 Subject: [PATCH 237/301] Add model 2023-10-25-goog_bert_ft_cola_18_en --- .../2023-10-25-goog_bert_ft_cola_18_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_18_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_18_en.md b/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_18_en.md new file mode 100644 index 00000000000000..24ed3f394e1b7a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_18_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English goog_bert_ft_cola_18 BertForSequenceClassification from Jeevesh8 +author: John Snow Labs +name: goog_bert_ft_cola_18 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`goog_bert_ft_cola_18` is a English model originally trained by Jeevesh8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/goog_bert_ft_cola_18_en_5.1.4_3.4_1698216649878.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/goog_bert_ft_cola_18_en_5.1.4_3.4_1698216649878.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("goog_bert_ft_cola_18","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("goog_bert_ft_cola_18","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|goog_bert_ft_cola_18| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Jeevesh8/goog_bert_ft_cola-18 \ No newline at end of file From 11183a8c50e578e9b84b1c0d0eeb146bfefb2941 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 18:33:11 +0700 Subject: [PATCH 238/301] Add model 2023-10-25-baseline_bert_base_cased_epoch3_batch4_lr2e_05_w0_01_en --- ...se_cased_epoch3_batch4_lr2e_05_w0_01_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-baseline_bert_base_cased_epoch3_batch4_lr2e_05_w0_01_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-baseline_bert_base_cased_epoch3_batch4_lr2e_05_w0_01_en.md b/docs/_posts/ahmedlone127/2023-10-25-baseline_bert_base_cased_epoch3_batch4_lr2e_05_w0_01_en.md new file mode 100644 index 00000000000000..4df20a4bbda04e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-baseline_bert_base_cased_epoch3_batch4_lr2e_05_w0_01_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English baseline_bert_base_cased_epoch3_batch4_lr2e_05_w0_01 BertForSequenceClassification from JerryYanJiang +author: John Snow Labs +name: baseline_bert_base_cased_epoch3_batch4_lr2e_05_w0_01 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`baseline_bert_base_cased_epoch3_batch4_lr2e_05_w0_01` is a English model originally trained by JerryYanJiang. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/baseline_bert_base_cased_epoch3_batch4_lr2e_05_w0_01_en_5.1.4_3.4_1698214827245.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/baseline_bert_base_cased_epoch3_batch4_lr2e_05_w0_01_en_5.1.4_3.4_1698214827245.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("baseline_bert_base_cased_epoch3_batch4_lr2e_05_w0_01","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("baseline_bert_base_cased_epoch3_batch4_lr2e_05_w0_01","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|baseline_bert_base_cased_epoch3_batch4_lr2e_05_w0_01| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|405.9 MB| + +## References + +https://huggingface.co/JerryYanJiang/baseline_bert-base-cased_epoch3_batch4_lr2e-05_w0.01 \ No newline at end of file From 915b80afe67bddb6cae1439fec4f8abb60a9a923 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 18:34:11 +0700 Subject: [PATCH 239/301] Add model 2023-10-25-bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_4_en --- ..._morning_news_market_overview_ssec_4_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_4_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_4_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_4_en.md new file mode 100644 index 00000000000000..2798c1896a7de9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_4_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_4 BertForSequenceClassification from hw2942 +author: John Snow Labs +name: bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_4 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_4` is a English model originally trained by hw2942. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_4_en_5.1.4_3.4_1698233527930.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_4_en_5.1.4_3.4_1698233527930.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_4","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_4","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_4| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|383.3 MB| + +## References + +https://huggingface.co/hw2942/bert-base-chinese-wallstreetcn-morning-news-market-overview-SSEC-4 \ No newline at end of file From e6f09c66eff361a88e732c913dddea0ed7a93348 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 18:35:11 +0700 Subject: [PATCH 240/301] Add model 2023-10-25-bert_ft_qqp_99_jeevesh8_en --- .../2023-10-25-bert_ft_qqp_99_jeevesh8_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_99_jeevesh8_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_99_jeevesh8_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_99_jeevesh8_en.md new file mode 100644 index 00000000000000..cf663bca885066 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_99_jeevesh8_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_ft_qqp_99_jeevesh8 BertForSequenceClassification from Jeevesh8 +author: John Snow Labs +name: bert_ft_qqp_99_jeevesh8 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_ft_qqp_99_jeevesh8` is a English model originally trained by Jeevesh8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_ft_qqp_99_jeevesh8_en_5.1.4_3.4_1698223118980.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_ft_qqp_99_jeevesh8_en_5.1.4_3.4_1698223118980.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_ft_qqp_99_jeevesh8","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_ft_qqp_99_jeevesh8","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_ft_qqp_99_jeevesh8| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Jeevesh8/bert_ft_qqp-99 \ No newline at end of file From 179af2c1b71484ecf2155afeda6e10a4d703cfe5 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 18:36:11 +0700 Subject: [PATCH 241/301] Add model 2023-10-25-bert_classifier_dehate_mono_indonesian_en --- ...rt_classifier_dehate_mono_indonesian_en.md | 108 ++++++++++++++++++ 1 file changed, 108 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_classifier_dehate_mono_indonesian_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_classifier_dehate_mono_indonesian_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_classifier_dehate_mono_indonesian_en.md new file mode 100644 index 00000000000000..3abbbab1c37120 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_classifier_dehate_mono_indonesian_en.md @@ -0,0 +1,108 @@ +--- +layout: model +title: English BertForSequenceClassification Cased model (from Hate-speech-CNERG) +author: John Snow Labs +name: bert_classifier_dehate_mono_indonesian +date: 2023-10-25 +tags: [bert, sequence_classification, classification, open_source, en, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `dehatebert-mono-indonesian` is a English model originally trained by `Hate-speech-CNERG`. + +## Predicted Entities + +`HATE`, `NON_HATE` + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_classifier_dehate_mono_indonesian_en_5.1.4_3.4_1698207541153.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_classifier_dehate_mono_indonesian_en_5.1.4_3.4_1698207541153.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +sequenceClassifier_loaded = BertForSequenceClassification.pretrained("bert_classifier_dehate_mono_indonesian","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("class") + +pipeline = Pipeline(stages=[documentAssembler, tokenizer,sequenceClassifier_loaded]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier_loaded = BertForSequenceClassification.pretrained("bert_classifier_dehate_mono_indonesian","en") + .setInputCols(Array("document", "token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer,sequenceClassifier_loaded)) + +val data = Seq("PUT YOUR STRING HERE").toDF("text") + +val result = pipeline.fit(data).transform(data) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("en.classify.bert.hate.mono_indonesian.by_hate_speech_cnerg").predict("""PUT YOUR STRING HERE""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_classifier_dehate_mono_indonesian| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|627.7 MB| +|Case sensitive:|true| +|Max sentence length:|256| + +## References + +References + +- https://huggingface.co/Hate-speech-CNERG/dehatebert-mono-indonesian +- https://github.com/punyajoy/DE-LIMIT +- https://arxiv.org/abs/2004.06465 \ No newline at end of file From 348901aa5d3670810b523885c5cbcb1dbd618014 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 18:37:11 +0700 Subject: [PATCH 242/301] Add model 2023-10-25-bert_italian_hate_speech_en --- .../2023-10-25-bert_italian_hate_speech_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_italian_hate_speech_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_italian_hate_speech_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_italian_hate_speech_en.md new file mode 100644 index 00000000000000..4ecf9e8b7aa01c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_italian_hate_speech_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_italian_hate_speech BertForSequenceClassification from pysentimiento +author: John Snow Labs +name: bert_italian_hate_speech +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_italian_hate_speech` is a English model originally trained by pysentimiento. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_italian_hate_speech_en_5.1.4_3.4_1698222151141.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_italian_hate_speech_en_5.1.4_3.4_1698222151141.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_italian_hate_speech","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_italian_hate_speech","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_italian_hate_speech| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|411.9 MB| + +## References + +https://huggingface.co/pysentimiento/bert-it-hate-speech \ No newline at end of file From c0d15f9d87f7aa18f60cf97445b43a1dcd264e84 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 18:38:11 +0700 Subject: [PATCH 243/301] Add model 2023-10-25-bert_classifier_base_uncased_qnli_en --- ...25-bert_classifier_base_uncased_qnli_en.md | 107 ++++++++++++++++++ 1 file changed, 107 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_classifier_base_uncased_qnli_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_classifier_base_uncased_qnli_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_classifier_base_uncased_qnli_en.md new file mode 100644 index 00000000000000..97cfc0ea4ffd8f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_classifier_base_uncased_qnli_en.md @@ -0,0 +1,107 @@ +--- +layout: model +title: English BertForSequenceClassification Base Uncased model (from Li) +author: John Snow Labs +name: bert_classifier_base_uncased_qnli +date: 2023-10-25 +tags: [bert, sequence_classification, classification, open_source, en, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `bert-base-uncased-qnli` is a English model originally trained by `Li`. + +## Predicted Entities + +`entailment`, `not_entailment` + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_classifier_base_uncased_qnli_en_5.1.4_3.4_1698220628861.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_classifier_base_uncased_qnli_en_5.1.4_3.4_1698220628861.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +sequenceClassifier_loaded = BertForSequenceClassification.pretrained("bert_classifier_base_uncased_qnli","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("class") + +pipeline = Pipeline(stages=[documentAssembler, tokenizer,sequenceClassifier_loaded]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier_loaded = BertForSequenceClassification.pretrained("bert_classifier_base_uncased_qnli","en") + .setInputCols(Array("document", "token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer,sequenceClassifier_loaded)) + +val data = Seq("PUT YOUR STRING HERE").toDF("text") + +val result = pipeline.fit(data).transform(data) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("en.classify.bert.uncased_base.by_li").predict("""PUT YOUR STRING HERE""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_classifier_base_uncased_qnli| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| +|Case sensitive:|false| +|Max sentence length:|256| + +## References + +References + +- https://huggingface.co/Li/bert-base-uncased-qnli +- https://paperswithcode.com/dataset/qnli \ No newline at end of file From 1a5014c36412fa957efeeb660c67c9cfba1d0de7 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 18:39:12 +0700 Subject: [PATCH 244/301] Add model 2023-10-25-goog_bert_ft_cola_21_en --- .../2023-10-25-goog_bert_ft_cola_21_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_21_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_21_en.md b/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_21_en.md new file mode 100644 index 00000000000000..f3a087bdcfa5b5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_21_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English goog_bert_ft_cola_21 BertForSequenceClassification from Jeevesh8 +author: John Snow Labs +name: goog_bert_ft_cola_21 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`goog_bert_ft_cola_21` is a English model originally trained by Jeevesh8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/goog_bert_ft_cola_21_en_5.1.4_3.4_1698217595519.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/goog_bert_ft_cola_21_en_5.1.4_3.4_1698217595519.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("goog_bert_ft_cola_21","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("goog_bert_ft_cola_21","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|goog_bert_ft_cola_21| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Jeevesh8/goog_bert_ft_cola-21 \ No newline at end of file From f8248c5cdf1641f264b435b7e1e7032e7edecd46 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 18:40:12 +0700 Subject: [PATCH 245/301] Add model 2023-10-25-goog_bert_ft_cola_31_en --- .../2023-10-25-goog_bert_ft_cola_31_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_31_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_31_en.md b/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_31_en.md new file mode 100644 index 00000000000000..9164a916b483c0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_31_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English goog_bert_ft_cola_31 BertForSequenceClassification from Jeevesh8 +author: John Snow Labs +name: goog_bert_ft_cola_31 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`goog_bert_ft_cola_31` is a English model originally trained by Jeevesh8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/goog_bert_ft_cola_31_en_5.1.4_3.4_1698218399794.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/goog_bert_ft_cola_31_en_5.1.4_3.4_1698218399794.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("goog_bert_ft_cola_31","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("goog_bert_ft_cola_31","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|goog_bert_ft_cola_31| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Jeevesh8/goog_bert_ft_cola-31 \ No newline at end of file From 995cf5dd77f939608af72e42d2efa2e686244a99 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 18:41:12 +0700 Subject: [PATCH 246/301] Add model 2023-10-25-goog_bert_ft_cola_67_en --- .../2023-10-25-goog_bert_ft_cola_67_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_67_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_67_en.md b/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_67_en.md new file mode 100644 index 00000000000000..fa12338d9dd42a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_67_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English goog_bert_ft_cola_67 BertForSequenceClassification from Jeevesh8 +author: John Snow Labs +name: goog_bert_ft_cola_67 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`goog_bert_ft_cola_67` is a English model originally trained by Jeevesh8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/goog_bert_ft_cola_67_en_5.1.4_3.4_1698233984646.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/goog_bert_ft_cola_67_en_5.1.4_3.4_1698233984646.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("goog_bert_ft_cola_67","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("goog_bert_ft_cola_67","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|goog_bert_ft_cola_67| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Jeevesh8/goog_bert_ft_cola-67 \ No newline at end of file From 76df9ce1c2df688993117a31fd41f02b1b070a9a Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 18:42:12 +0700 Subject: [PATCH 247/301] Add model 2023-10-25-a02_suicide_bert_huggingface_finetune_en --- ...02_suicide_bert_huggingface_finetune_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-a02_suicide_bert_huggingface_finetune_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-a02_suicide_bert_huggingface_finetune_en.md b/docs/_posts/ahmedlone127/2023-10-25-a02_suicide_bert_huggingface_finetune_en.md new file mode 100644 index 00000000000000..257a39a4621e78 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-a02_suicide_bert_huggingface_finetune_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English a02_suicide_bert_huggingface_finetune BertForSequenceClassification from Ariffnaz +author: John Snow Labs +name: a02_suicide_bert_huggingface_finetune +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`a02_suicide_bert_huggingface_finetune` is a English model originally trained by Ariffnaz. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/a02_suicide_bert_huggingface_finetune_en_5.1.4_3.4_1698233704009.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/a02_suicide_bert_huggingface_finetune_en_5.1.4_3.4_1698233704009.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("a02_suicide_bert_huggingface_finetune","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("a02_suicide_bert_huggingface_finetune","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|a02_suicide_bert_huggingface_finetune| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Ariffnaz/a02-suicide-bert-huggingface-finetune \ No newline at end of file From 804c3085d193c46d56292a02d2297fa57faf493e Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 18:43:12 +0700 Subject: [PATCH 248/301] Add model 2023-10-25-bert_classifier_finbert_en --- .../2023-10-25-bert_classifier_finbert_en.md | 109 ++++++++++++++++++ 1 file changed, 109 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_classifier_finbert_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_classifier_finbert_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_classifier_finbert_en.md new file mode 100644 index 00000000000000..6cfa5cff102591 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_classifier_finbert_en.md @@ -0,0 +1,109 @@ +--- +layout: model +title: English BertForSequenceClassification Cased model (from ProsusAI) +author: John Snow Labs +name: bert_classifier_finbert +date: 2023-10-25 +tags: [en, open_source, bert, sequence_classification, classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `finbert` is a English model originally trained by `ProsusAI`. + +## Predicted Entities + +`neutral`, `positive`, `negative` + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_classifier_finbert_en_5.1.4_3.4_1698233630506.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_classifier_finbert_en_5.1.4_3.4_1698233630506.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +seq_classifier = BertForSequenceClassification.pretrained("bert_classifier_finbert","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("class") + +pipeline = Pipeline(stages=[documentAssembler, tokenizer, seq_classifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() + .setInputCols(Array("text")) + .setOutputCols(Array("document")) + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val seq_classifier = BertForSequenceClassification.pretrained("bert_classifier_finbert","en") + .setInputCols(Array("document", "token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, seq_classifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("en.classify.bert.by_prosusai").predict("""PUT YOUR STRING HERE""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_classifier_finbert| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| +|Case sensitive:|true| +|Max sentence length:|256| + +## References + +References + +- https://huggingface.co/ProsusAI/finbert +- https://www.researchgate.net/publication/251231107_Good_Debt_or_Bad_Debt_Detecting_Semantic_Orientations_in_Economic_Texts +- https://arxiv.org/abs/1908.10063 +- https://medium.com/prosus-ai-tech-blog/finbert-financial-sentiment-analysis-with-bert-b277a3607101 \ No newline at end of file From 6b754cecb9f2d286187c5cdf2677de5bc7e517c8 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 18:44:12 +0700 Subject: [PATCH 249/301] Add model 2023-10-25-bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_5_en --- ..._morning_news_market_overview_ssec_5_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_5_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_5_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_5_en.md new file mode 100644 index 00000000000000..f5c416b40d8d47 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_5_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_5 BertForSequenceClassification from hw2942 +author: John Snow Labs +name: bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_5 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_5` is a English model originally trained by hw2942. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_5_en_5.1.4_3.4_1698234159300.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_5_en_5.1.4_3.4_1698234159300.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_5","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_5","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_5| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|383.3 MB| + +## References + +https://huggingface.co/hw2942/bert-base-chinese-wallstreetcn-morning-news-market-overview-SSEC-5 \ No newline at end of file From 31d67bf4e76c108d51ba26706cda1f34735c34b5 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 18:45:12 +0700 Subject: [PATCH 250/301] Add model 2023-10-25-bert_ft_cola_99_en --- .../2023-10-25-bert_ft_cola_99_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_ft_cola_99_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_ft_cola_99_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_ft_cola_99_en.md new file mode 100644 index 00000000000000..628c3fa60b2503 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_ft_cola_99_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_ft_cola_99 BertForSequenceClassification from Jeevesh8 +author: John Snow Labs +name: bert_ft_cola_99 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_ft_cola_99` is a English model originally trained by Jeevesh8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_ft_cola_99_en_5.1.4_3.4_1698234234091.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_ft_cola_99_en_5.1.4_3.4_1698234234091.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_ft_cola_99","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_ft_cola_99","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_ft_cola_99| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Jeevesh8/bert_ft_cola-99 \ No newline at end of file From 70dc44edd8b4ac2ce7e660e027fd40dd7add7cc9 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 18:46:13 +0700 Subject: [PATCH 251/301] Add model 2023-10-25-bert_ft_qqp_82_jeevesh8_en --- .../2023-10-25-bert_ft_qqp_82_jeevesh8_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_82_jeevesh8_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_82_jeevesh8_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_82_jeevesh8_en.md new file mode 100644 index 00000000000000..fed0cb005da5c4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_ft_qqp_82_jeevesh8_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_ft_qqp_82_jeevesh8 BertForSequenceClassification from Jeevesh8 +author: John Snow Labs +name: bert_ft_qqp_82_jeevesh8 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_ft_qqp_82_jeevesh8` is a English model originally trained by Jeevesh8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_ft_qqp_82_jeevesh8_en_5.1.4_3.4_1698219788884.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_ft_qqp_82_jeevesh8_en_5.1.4_3.4_1698219788884.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_ft_qqp_82_jeevesh8","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_ft_qqp_82_jeevesh8","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_ft_qqp_82_jeevesh8| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Jeevesh8/bert_ft_qqp-82 \ No newline at end of file From d973d0d6b4cfadbe4bbcf146a3b8891f1d8f965b Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 18:47:13 +0700 Subject: [PATCH 252/301] Add model 2023-10-25-goog_bert_ft_cola_3_en --- .../2023-10-25-goog_bert_ft_cola_3_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_3_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_3_en.md b/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_3_en.md new file mode 100644 index 00000000000000..8150098741b8bf --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_3_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English goog_bert_ft_cola_3 BertForSequenceClassification from Jeevesh8 +author: John Snow Labs +name: goog_bert_ft_cola_3 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`goog_bert_ft_cola_3` is a English model originally trained by Jeevesh8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/goog_bert_ft_cola_3_en_5.1.4_3.4_1698215297155.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/goog_bert_ft_cola_3_en_5.1.4_3.4_1698215297155.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("goog_bert_ft_cola_3","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("goog_bert_ft_cola_3","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|goog_bert_ft_cola_3| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Jeevesh8/goog_bert_ft_cola-3 \ No newline at end of file From 494c711e76d68614ede572d2fcc8fc55c865c10a Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 18:48:13 +0700 Subject: [PATCH 253/301] Add model 2023-10-25-bert_base_chinese_finetuned_binary_best_en --- ...t_base_chinese_finetuned_binary_best_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_base_chinese_finetuned_binary_best_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_base_chinese_finetuned_binary_best_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_base_chinese_finetuned_binary_best_en.md new file mode 100644 index 00000000000000..3c21357daf4f37 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_base_chinese_finetuned_binary_best_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_base_chinese_finetuned_binary_best BertForSequenceClassification from Raychanan +author: John Snow Labs +name: bert_base_chinese_finetuned_binary_best +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_chinese_finetuned_binary_best` is a English model originally trained by Raychanan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_chinese_finetuned_binary_best_en_5.1.4_3.4_1698234417943.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_chinese_finetuned_binary_best_en_5.1.4_3.4_1698234417943.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_base_chinese_finetuned_binary_best","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_base_chinese_finetuned_binary_best","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_chinese_finetuned_binary_best| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|383.3 MB| + +## References + +https://huggingface.co/Raychanan/bert-base-chinese-FineTuned-Binary-Best \ No newline at end of file From 07f83d42a5992c458cda0988c47156b071d14e96 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 18:49:13 +0700 Subject: [PATCH 254/301] Add model 2023-10-25-bert_classification_1500samples_en --- ...0-25-bert_classification_1500samples_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_classification_1500samples_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_classification_1500samples_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_classification_1500samples_en.md new file mode 100644 index 00000000000000..96cdd384b00952 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_classification_1500samples_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_classification_1500samples BertForSequenceClassification from jayavibhav +author: John Snow Labs +name: bert_classification_1500samples +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_classification_1500samples` is a English model originally trained by jayavibhav. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_classification_1500samples_en_5.1.4_3.4_1698221153308.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_classification_1500samples_en_5.1.4_3.4_1698221153308.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_classification_1500samples","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_classification_1500samples","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_classification_1500samples| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/jayavibhav/bert-classification-1500samples \ No newline at end of file From be6769e7e99defc782cd6f1b8e544200ddbd00c3 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 18:50:14 +0700 Subject: [PATCH 255/301] Add model 2023-10-25-goog_bert_ft_cola_35_en --- .../2023-10-25-goog_bert_ft_cola_35_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_35_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_35_en.md b/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_35_en.md new file mode 100644 index 00000000000000..da0bd10c0c1f63 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_35_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English goog_bert_ft_cola_35 BertForSequenceClassification from Jeevesh8 +author: John Snow Labs +name: goog_bert_ft_cola_35 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`goog_bert_ft_cola_35` is a English model originally trained by Jeevesh8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/goog_bert_ft_cola_35_en_5.1.4_3.4_1698220033273.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/goog_bert_ft_cola_35_en_5.1.4_3.4_1698220033273.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("goog_bert_ft_cola_35","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("goog_bert_ft_cola_35","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|goog_bert_ft_cola_35| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Jeevesh8/goog_bert_ft_cola-35 \ No newline at end of file From 792173846c4aa6e567d447e75fb65e36a361db6f Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 18:51:14 +0700 Subject: [PATCH 256/301] Add model 2023-10-25-tiny_bert_sst2_distilled_kushaljoseph_en --- ...iny_bert_sst2_distilled_kushaljoseph_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-tiny_bert_sst2_distilled_kushaljoseph_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-tiny_bert_sst2_distilled_kushaljoseph_en.md b/docs/_posts/ahmedlone127/2023-10-25-tiny_bert_sst2_distilled_kushaljoseph_en.md new file mode 100644 index 00000000000000..720694324591fb --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-tiny_bert_sst2_distilled_kushaljoseph_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English tiny_bert_sst2_distilled_kushaljoseph BertForSequenceClassification from kushaljoseph +author: John Snow Labs +name: tiny_bert_sst2_distilled_kushaljoseph +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tiny_bert_sst2_distilled_kushaljoseph` is a English model originally trained by kushaljoseph. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tiny_bert_sst2_distilled_kushaljoseph_en_5.1.4_3.4_1698234651704.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tiny_bert_sst2_distilled_kushaljoseph_en_5.1.4_3.4_1698234651704.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("tiny_bert_sst2_distilled_kushaljoseph","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("tiny_bert_sst2_distilled_kushaljoseph","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tiny_bert_sst2_distilled_kushaljoseph| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|16.7 MB| + +## References + +https://huggingface.co/kushaljoseph/tiny-bert-sst2-distilled \ No newline at end of file From acb253e12bbaa158cfe0ed9113907898e422aa71 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 18:54:54 +0700 Subject: [PATCH 257/301] Add model 2023-10-25-goog_bert_ft_cola_65_en --- .../2023-10-25-goog_bert_ft_cola_65_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_65_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_65_en.md b/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_65_en.md new file mode 100644 index 00000000000000..3c511abdf76e74 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_65_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English goog_bert_ft_cola_65 BertForSequenceClassification from Jeevesh8 +author: John Snow Labs +name: goog_bert_ft_cola_65 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`goog_bert_ft_cola_65` is a English model originally trained by Jeevesh8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/goog_bert_ft_cola_65_en_5.1.4_3.4_1698234886083.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/goog_bert_ft_cola_65_en_5.1.4_3.4_1698234886083.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("goog_bert_ft_cola_65","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("goog_bert_ft_cola_65","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|goog_bert_ft_cola_65| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Jeevesh8/goog_bert_ft_cola-65 \ No newline at end of file From e7c45b7133d1d4baed602e1fa5476d14d6a570ff Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 19:00:38 +0700 Subject: [PATCH 258/301] Add model 2023-10-25-chinese_roberta_wwm_ext_finetuned_binary_en --- ...ese_roberta_wwm_ext_finetuned_binary_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-chinese_roberta_wwm_ext_finetuned_binary_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-chinese_roberta_wwm_ext_finetuned_binary_en.md b/docs/_posts/ahmedlone127/2023-10-25-chinese_roberta_wwm_ext_finetuned_binary_en.md new file mode 100644 index 00000000000000..c4d3f7d814634f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-chinese_roberta_wwm_ext_finetuned_binary_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English chinese_roberta_wwm_ext_finetuned_binary BertForSequenceClassification from Raychanan +author: John Snow Labs +name: chinese_roberta_wwm_ext_finetuned_binary +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`chinese_roberta_wwm_ext_finetuned_binary` is a English model originally trained by Raychanan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/chinese_roberta_wwm_ext_finetuned_binary_en_5.1.4_3.4_1698235230260.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/chinese_roberta_wwm_ext_finetuned_binary_en_5.1.4_3.4_1698235230260.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("chinese_roberta_wwm_ext_finetuned_binary","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("chinese_roberta_wwm_ext_finetuned_binary","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|chinese_roberta_wwm_ext_finetuned_binary| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|383.2 MB| + +## References + +https://huggingface.co/Raychanan/chinese-roberta-wwm-ext-FineTuned-Binary \ No newline at end of file From 1c151ba1ad847f49c73882e0be52c3edf9c5c533 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 19:13:25 +0700 Subject: [PATCH 259/301] Add model 2023-10-25-goog_bert_ft_cola_58_en --- .../2023-10-25-goog_bert_ft_cola_58_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_58_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_58_en.md b/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_58_en.md new file mode 100644 index 00000000000000..d6d1a9450b8f47 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_58_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English goog_bert_ft_cola_58 BertForSequenceClassification from Jeevesh8 +author: John Snow Labs +name: goog_bert_ft_cola_58 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`goog_bert_ft_cola_58` is a English model originally trained by Jeevesh8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/goog_bert_ft_cola_58_en_5.1.4_3.4_1698235996382.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/goog_bert_ft_cola_58_en_5.1.4_3.4_1698235996382.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("goog_bert_ft_cola_58","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("goog_bert_ft_cola_58","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|goog_bert_ft_cola_58| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Jeevesh8/goog_bert_ft_cola-58 \ No newline at end of file From b4d45eb02ad35737b533e0bb43edd9e1669545af Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 19:14:25 +0700 Subject: [PATCH 260/301] Add model 2023-10-25-norbert2_sentiment_norec_4_en --- ...023-10-25-norbert2_sentiment_norec_4_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-norbert2_sentiment_norec_4_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-norbert2_sentiment_norec_4_en.md b/docs/_posts/ahmedlone127/2023-10-25-norbert2_sentiment_norec_4_en.md new file mode 100644 index 00000000000000..f7cec3abb9b5ee --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-norbert2_sentiment_norec_4_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English norbert2_sentiment_norec_4 BertForSequenceClassification from NTCAL +author: John Snow Labs +name: norbert2_sentiment_norec_4 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`norbert2_sentiment_norec_4` is a English model originally trained by NTCAL. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/norbert2_sentiment_norec_4_en_5.1.4_3.4_1698235996431.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/norbert2_sentiment_norec_4_en_5.1.4_3.4_1698235996431.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("norbert2_sentiment_norec_4","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("norbert2_sentiment_norec_4","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|norbert2_sentiment_norec_4| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/NTCAL/norbert2_sentiment_norec_4 \ No newline at end of file From 07509bbb1a9b81a689f4d574e7d56e3984c40ad5 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 19:28:20 +0700 Subject: [PATCH 261/301] Add model 2023-10-25-bert_classifier_russian_base_srl_ru --- ...-25-bert_classifier_russian_base_srl_ru.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_classifier_russian_base_srl_ru.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_classifier_russian_base_srl_ru.md b/docs/_posts/ahmedlone127/2023-10-25-bert_classifier_russian_base_srl_ru.md new file mode 100644 index 00000000000000..a77f5d7f698f53 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_classifier_russian_base_srl_ru.md @@ -0,0 +1,97 @@ +--- +layout: model +title: Russian bert_classifier_russian_base_srl BertForSequenceClassification from Rexhaif +author: John Snow Labs +name: bert_classifier_russian_base_srl +date: 2023-10-25 +tags: [bert, ru, open_source, sequence_classification, onnx] +task: Text Classification +language: ru +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_classifier_russian_base_srl` is a Russian model originally trained by Rexhaif. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_classifier_russian_base_srl_ru_5.1.4_3.4_1698236889529.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_classifier_russian_base_srl_ru_5.1.4_3.4_1698236889529.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_classifier_russian_base_srl","ru")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_classifier_russian_base_srl","ru") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_classifier_russian_base_srl| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|ru| +|Size:|669.3 MB| + +## References + +https://huggingface.co/Rexhaif/rubert-base-srl \ No newline at end of file From 98e23dc0ee1a98535a3279adc1f264893ac88261 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 19:29:57 +0700 Subject: [PATCH 262/301] Add model 2023-10-25-goog_bert_ft_cola_59_en --- .../2023-10-25-goog_bert_ft_cola_59_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_59_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_59_en.md b/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_59_en.md new file mode 100644 index 00000000000000..0fa6ae8ccc1213 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_59_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English goog_bert_ft_cola_59 BertForSequenceClassification from Jeevesh8 +author: John Snow Labs +name: goog_bert_ft_cola_59 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`goog_bert_ft_cola_59` is a English model originally trained by Jeevesh8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/goog_bert_ft_cola_59_en_5.1.4_3.4_1698236948806.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/goog_bert_ft_cola_59_en_5.1.4_3.4_1698236948806.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("goog_bert_ft_cola_59","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("goog_bert_ft_cola_59","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|goog_bert_ft_cola_59| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Jeevesh8/goog_bert_ft_cola-59 \ No newline at end of file From 94f9a2efd8547934f73584fdb7e55067e12ef5ec Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 19:36:15 +0700 Subject: [PATCH 263/301] Add model 2023-10-25-covid_twitter_bert_v2_mnli_nli_sts_crossencoder_covid_hera_en --- ...mnli_nli_sts_crossencoder_covid_hera_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-covid_twitter_bert_v2_mnli_nli_sts_crossencoder_covid_hera_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-covid_twitter_bert_v2_mnli_nli_sts_crossencoder_covid_hera_en.md b/docs/_posts/ahmedlone127/2023-10-25-covid_twitter_bert_v2_mnli_nli_sts_crossencoder_covid_hera_en.md new file mode 100644 index 00000000000000..64d2a043d42408 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-covid_twitter_bert_v2_mnli_nli_sts_crossencoder_covid_hera_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English covid_twitter_bert_v2_mnli_nli_sts_crossencoder_covid_hera BertForSequenceClassification from liyijing024 +author: John Snow Labs +name: covid_twitter_bert_v2_mnli_nli_sts_crossencoder_covid_hera +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`covid_twitter_bert_v2_mnli_nli_sts_crossencoder_covid_hera` is a English model originally trained by liyijing024. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/covid_twitter_bert_v2_mnli_nli_sts_crossencoder_covid_hera_en_5.1.4_3.4_1698237356534.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/covid_twitter_bert_v2_mnli_nli_sts_crossencoder_covid_hera_en_5.1.4_3.4_1698237356534.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("covid_twitter_bert_v2_mnli_nli_sts_crossencoder_covid_hera","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("covid_twitter_bert_v2_mnli_nli_sts_crossencoder_covid_hera","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|covid_twitter_bert_v2_mnli_nli_sts_crossencoder_covid_hera| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/liyijing024/covid-twitter-bert-v2-mnli-NLI-STS-CrossEncoder-Covid-HeRA \ No newline at end of file From 4c521c8c7ec31635896afb310e6546cfcbe85804 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 19:41:43 +0700 Subject: [PATCH 264/301] Add model 2023-10-25-norbert2_sentiment_norec_6_en --- ...023-10-25-norbert2_sentiment_norec_6_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-norbert2_sentiment_norec_6_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-norbert2_sentiment_norec_6_en.md b/docs/_posts/ahmedlone127/2023-10-25-norbert2_sentiment_norec_6_en.md new file mode 100644 index 00000000000000..60212355c661b3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-norbert2_sentiment_norec_6_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English norbert2_sentiment_norec_6 BertForSequenceClassification from NTCAL +author: John Snow Labs +name: norbert2_sentiment_norec_6 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`norbert2_sentiment_norec_6` is a English model originally trained by NTCAL. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/norbert2_sentiment_norec_6_en_5.1.4_3.4_1698237683293.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/norbert2_sentiment_norec_6_en_5.1.4_3.4_1698237683293.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("norbert2_sentiment_norec_6","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("norbert2_sentiment_norec_6","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|norbert2_sentiment_norec_6| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/NTCAL/norbert2_sentiment_norec_6 \ No newline at end of file From cf5a7a6b72c935b29f0270d404c9b2e956d95d49 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 19:43:43 +0700 Subject: [PATCH 265/301] Add model 2023-10-25-legal_bert_tpb_clause_class_en --- ...23-10-25-legal_bert_tpb_clause_class_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-legal_bert_tpb_clause_class_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-legal_bert_tpb_clause_class_en.md b/docs/_posts/ahmedlone127/2023-10-25-legal_bert_tpb_clause_class_en.md new file mode 100644 index 00000000000000..db9dffa14be5d4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-legal_bert_tpb_clause_class_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English legal_bert_tpb_clause_class BertForSequenceClassification from frankkuete +author: John Snow Labs +name: legal_bert_tpb_clause_class +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`legal_bert_tpb_clause_class` is a English model originally trained by frankkuete. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/legal_bert_tpb_clause_class_en_5.1.4_3.4_1698237815681.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/legal_bert_tpb_clause_class_en_5.1.4_3.4_1698237815681.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("legal_bert_tpb_clause_class","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("legal_bert_tpb_clause_class","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|legal_bert_tpb_clause_class| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.5 MB| + +## References + +https://huggingface.co/frankkuete/legal-bert-tpb-clause-class \ No newline at end of file From 58e88009468a366fd950d782f7e97241d003d7f5 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 19:44:51 +0700 Subject: [PATCH 266/301] Add model 2023-10-25-goog_bert_ft_cola_61_en --- .../2023-10-25-goog_bert_ft_cola_61_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_61_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_61_en.md b/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_61_en.md new file mode 100644 index 00000000000000..1b7d5299d6c7dd --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_61_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English goog_bert_ft_cola_61 BertForSequenceClassification from Jeevesh8 +author: John Snow Labs +name: goog_bert_ft_cola_61 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`goog_bert_ft_cola_61` is a English model originally trained by Jeevesh8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/goog_bert_ft_cola_61_en_5.1.4_3.4_1698237883855.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/goog_bert_ft_cola_61_en_5.1.4_3.4_1698237883855.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("goog_bert_ft_cola_61","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("goog_bert_ft_cola_61","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|goog_bert_ft_cola_61| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Jeevesh8/goog_bert_ft_cola-61 \ No newline at end of file From 30fb816b970e3453ce5e2a6ebd60d968639d6e95 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 19:58:55 +0700 Subject: [PATCH 267/301] Add model 2023-10-25-twitter_disaster_bert_large_en --- ...23-10-25-twitter_disaster_bert_large_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-twitter_disaster_bert_large_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-twitter_disaster_bert_large_en.md b/docs/_posts/ahmedlone127/2023-10-25-twitter_disaster_bert_large_en.md new file mode 100644 index 00000000000000..54e97698c2eb2c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-twitter_disaster_bert_large_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English twitter_disaster_bert_large BertForSequenceClassification from ReynaQuita +author: John Snow Labs +name: twitter_disaster_bert_large +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`twitter_disaster_bert_large` is a English model originally trained by ReynaQuita. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/twitter_disaster_bert_large_en_5.1.4_3.4_1698238716080.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/twitter_disaster_bert_large_en_5.1.4_3.4_1698238716080.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("twitter_disaster_bert_large","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("twitter_disaster_bert_large","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|twitter_disaster_bert_large| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/ReynaQuita/twitter_disaster_bert_large \ No newline at end of file From 84a57a01ebe43d5a4388c2d6fd620c98a2b9dffa Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 20:03:55 +0700 Subject: [PATCH 268/301] Add model 2023-10-25-goog_bert_ft_cola_62_en --- .../2023-10-25-goog_bert_ft_cola_62_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_62_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_62_en.md b/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_62_en.md new file mode 100644 index 00000000000000..6e1ab25b09c2e1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_62_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English goog_bert_ft_cola_62 BertForSequenceClassification from Jeevesh8 +author: John Snow Labs +name: goog_bert_ft_cola_62 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`goog_bert_ft_cola_62` is a English model originally trained by Jeevesh8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/goog_bert_ft_cola_62_en_5.1.4_3.4_1698239026933.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/goog_bert_ft_cola_62_en_5.1.4_3.4_1698239026933.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("goog_bert_ft_cola_62","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("goog_bert_ft_cola_62","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|goog_bert_ft_cola_62| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Jeevesh8/goog_bert_ft_cola-62 \ No newline at end of file From 650b33a0a2e74787bb14f8039eb5530197aa2ed9 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 20:04:56 +0700 Subject: [PATCH 269/301] Add model 2023-10-25-bert_large_uncased_mnli_ofirzaf_en --- ...0-25-bert_large_uncased_mnli_ofirzaf_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_large_uncased_mnli_ofirzaf_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_large_uncased_mnli_ofirzaf_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_large_uncased_mnli_ofirzaf_en.md new file mode 100644 index 00000000000000..63886fb14ffb4e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_large_uncased_mnli_ofirzaf_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_large_uncased_mnli_ofirzaf BertForSequenceClassification from ofirzaf +author: John Snow Labs +name: bert_large_uncased_mnli_ofirzaf +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_uncased_mnli_ofirzaf` is a English model originally trained by ofirzaf. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_uncased_mnli_ofirzaf_en_5.1.4_3.4_1698239030775.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_uncased_mnli_ofirzaf_en_5.1.4_3.4_1698239030775.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_large_uncased_mnli_ofirzaf","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_large_uncased_mnli_ofirzaf","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_uncased_mnli_ofirzaf| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/ofirzaf/bert-large-uncased-mnli \ No newline at end of file From 289c295fbb790de694fadb99b0efe6704e610606 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 20:09:22 +0700 Subject: [PATCH 270/301] Add model 2023-10-25-norbert2_sentiment_norec_7_en --- ...023-10-25-norbert2_sentiment_norec_7_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-norbert2_sentiment_norec_7_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-norbert2_sentiment_norec_7_en.md b/docs/_posts/ahmedlone127/2023-10-25-norbert2_sentiment_norec_7_en.md new file mode 100644 index 00000000000000..1927f729a20ea6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-norbert2_sentiment_norec_7_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English norbert2_sentiment_norec_7 BertForSequenceClassification from NTCAL +author: John Snow Labs +name: norbert2_sentiment_norec_7 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`norbert2_sentiment_norec_7` is a English model originally trained by NTCAL. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/norbert2_sentiment_norec_7_en_5.1.4_3.4_1698239342573.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/norbert2_sentiment_norec_7_en_5.1.4_3.4_1698239342573.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("norbert2_sentiment_norec_7","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("norbert2_sentiment_norec_7","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|norbert2_sentiment_norec_7| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/NTCAL/norbert2_sentiment_norec_7 \ No newline at end of file From 6afa6c84c56b57f57056ebac568cc2ec3083937a Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 20:17:58 +0700 Subject: [PATCH 271/301] Add model 2023-10-25-bert_classifier_riad_finetuned_mrpc_en --- ...-bert_classifier_riad_finetuned_mrpc_en.md | 107 ++++++++++++++++++ 1 file changed, 107 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_classifier_riad_finetuned_mrpc_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_classifier_riad_finetuned_mrpc_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_classifier_riad_finetuned_mrpc_en.md new file mode 100644 index 00000000000000..a9f8b2152d0c4c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_classifier_riad_finetuned_mrpc_en.md @@ -0,0 +1,107 @@ +--- +layout: model +title: English BertForSequenceClassification Cased model (from Riad) +author: John Snow Labs +name: bert_classifier_riad_finetuned_mrpc +date: 2023-10-25 +tags: [bert, sequence_classification, classification, open_source, en, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `finetuned-bert-mrpc` is a English model originally trained by `Riad`. + +## Predicted Entities + +`equivalent`, `not equivalent` + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_classifier_riad_finetuned_mrpc_en_5.1.4_3.4_1698239870016.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_classifier_riad_finetuned_mrpc_en_5.1.4_3.4_1698239870016.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +sequenceClassifier_loaded = BertForSequenceClassification.pretrained("bert_classifier_riad_finetuned_mrpc","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("class") + +pipeline = Pipeline(stages=[documentAssembler, tokenizer,sequenceClassifier_loaded]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier_loaded = BertForSequenceClassification.pretrained("bert_classifier_riad_finetuned_mrpc","en") + .setInputCols(Array("document", "token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer,sequenceClassifier_loaded)) + +val data = Seq("PUT YOUR STRING HERE").toDF("text") + +val result = pipeline.fit(data).transform(data) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("en.classify.bert.glue.finetuned.by_Riad").predict("""PUT YOUR STRING HERE""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_classifier_riad_finetuned_mrpc| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|405.9 MB| +|Case sensitive:|true| +|Max sentence length:|256| + +## References + +References + +- https://huggingface.co/Riad/finetuned-bert-mrpc +- https://paperswithcode.com/sota?task=Text+Classification&dataset=glue \ No newline at end of file From 034307a1ba9afa6e62c342fac80c8d76524be8a6 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 20:18:58 +0700 Subject: [PATCH 272/301] Add model 2023-10-25-bert_base_cased_finetuned_cola_sreyang_nvidia_en --- ..._cased_finetuned_cola_sreyang_nvidia_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_base_cased_finetuned_cola_sreyang_nvidia_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_base_cased_finetuned_cola_sreyang_nvidia_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_base_cased_finetuned_cola_sreyang_nvidia_en.md new file mode 100644 index 00000000000000..416af5ec23cb6c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_base_cased_finetuned_cola_sreyang_nvidia_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_base_cased_finetuned_cola_sreyang_nvidia BertForSequenceClassification from SreyanG-NVIDIA +author: John Snow Labs +name: bert_base_cased_finetuned_cola_sreyang_nvidia +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_cased_finetuned_cola_sreyang_nvidia` is a English model originally trained by SreyanG-NVIDIA. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_cased_finetuned_cola_sreyang_nvidia_en_5.1.4_3.4_1698239875860.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_cased_finetuned_cola_sreyang_nvidia_en_5.1.4_3.4_1698239875860.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_base_cased_finetuned_cola_sreyang_nvidia","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_base_cased_finetuned_cola_sreyang_nvidia","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_cased_finetuned_cola_sreyang_nvidia| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|405.9 MB| + +## References + +https://huggingface.co/SreyanG-NVIDIA/bert-base-cased-finetuned-cola \ No newline at end of file From 23092d8a121f416ec7136dc4dcd9e6646f9c932b Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 20:19:58 +0700 Subject: [PATCH 273/301] Add model 2023-10-25-bert_base_uncased_research_articles_multilabel_en --- ...uncased_research_articles_multilabel_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_base_uncased_research_articles_multilabel_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_base_uncased_research_articles_multilabel_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_base_uncased_research_articles_multilabel_en.md new file mode 100644 index 00000000000000..410d858526df84 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_base_uncased_research_articles_multilabel_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_base_uncased_research_articles_multilabel BertForSequenceClassification from DunnBC22 +author: John Snow Labs +name: bert_base_uncased_research_articles_multilabel +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_research_articles_multilabel` is a English model originally trained by DunnBC22. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_research_articles_multilabel_en_5.1.4_3.4_1698239982874.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_research_articles_multilabel_en_5.1.4_3.4_1698239982874.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_base_uncased_research_articles_multilabel","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_base_uncased_research_articles_multilabel","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_research_articles_multilabel| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/DunnBC22/bert-base-uncased-Research_Articles_Multilabel \ No newline at end of file From 758576cb15aef55bc8f571e8d8a781ad40555675 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 20:20:59 +0700 Subject: [PATCH 274/301] Add model 2023-10-25-goog_bert_ft_cola_66_en --- .../2023-10-25-goog_bert_ft_cola_66_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_66_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_66_en.md b/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_66_en.md new file mode 100644 index 00000000000000..58193b59a8aa0d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_66_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English goog_bert_ft_cola_66 BertForSequenceClassification from Jeevesh8 +author: John Snow Labs +name: goog_bert_ft_cola_66 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`goog_bert_ft_cola_66` is a English model originally trained by Jeevesh8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/goog_bert_ft_cola_66_en_5.1.4_3.4_1698239876018.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/goog_bert_ft_cola_66_en_5.1.4_3.4_1698239876018.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("goog_bert_ft_cola_66","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("goog_bert_ft_cola_66","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|goog_bert_ft_cola_66| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Jeevesh8/goog_bert_ft_cola-66 \ No newline at end of file From 890c585ae5391097cf4d209f5e6041d550b278fe Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 20:24:17 +0700 Subject: [PATCH 275/301] Add model 2023-10-25-mini_bert_distilled_en --- .../2023-10-25-mini_bert_distilled_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-mini_bert_distilled_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-mini_bert_distilled_en.md b/docs/_posts/ahmedlone127/2023-10-25-mini_bert_distilled_en.md new file mode 100644 index 00000000000000..19b3cb261a694c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-mini_bert_distilled_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English mini_bert_distilled BertForSequenceClassification from moshew +author: John Snow Labs +name: mini_bert_distilled +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mini_bert_distilled` is a English model originally trained by moshew. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mini_bert_distilled_en_5.1.4_3.4_1698240255617.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mini_bert_distilled_en_5.1.4_3.4_1698240255617.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("mini_bert_distilled","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("mini_bert_distilled","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mini_bert_distilled| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|42.2 MB| + +## References + +https://huggingface.co/moshew/Mini-bert-distilled \ No newline at end of file From 5ae2477611625048cb5b66e6e05787a5fc4e3ae9 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 20:25:17 +0700 Subject: [PATCH 276/301] Add model 2023-10-25-bert_fom_job_description_assignment_en --- ...-bert_fom_job_description_assignment_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_fom_job_description_assignment_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_fom_job_description_assignment_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_fom_job_description_assignment_en.md new file mode 100644 index 00000000000000..659f75aa08ef65 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_fom_job_description_assignment_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_fom_job_description_assignment BertForSequenceClassification from Pazel +author: John Snow Labs +name: bert_fom_job_description_assignment +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_fom_job_description_assignment` is a English model originally trained by Pazel. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_fom_job_description_assignment_en_5.1.4_3.4_1698240255635.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_fom_job_description_assignment_en_5.1.4_3.4_1698240255635.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_fom_job_description_assignment","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_fom_job_description_assignment","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_fom_job_description_assignment| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|627.7 MB| + +## References + +https://huggingface.co/Pazel/bert-fom-job-description-assignment \ No newline at end of file From adbbb2547e68d9f168b7c57787e0f22878061e65 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 20:31:57 +0700 Subject: [PATCH 277/301] Add model 2023-10-25-goog_bert_ft_cola_76_en --- .../2023-10-25-goog_bert_ft_cola_76_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_76_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_76_en.md b/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_76_en.md new file mode 100644 index 00000000000000..0084c059fa22af --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_76_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English goog_bert_ft_cola_76 BertForSequenceClassification from Jeevesh8 +author: John Snow Labs +name: goog_bert_ft_cola_76 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`goog_bert_ft_cola_76` is a English model originally trained by Jeevesh8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/goog_bert_ft_cola_76_en_5.1.4_3.4_1698240706542.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/goog_bert_ft_cola_76_en_5.1.4_3.4_1698240706542.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("goog_bert_ft_cola_76","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("goog_bert_ft_cola_76","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|goog_bert_ft_cola_76| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Jeevesh8/goog_bert_ft_cola-76 \ No newline at end of file From f53384beeca57d02d4c1e0f7965d410a54722179 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 20:33:31 +0700 Subject: [PATCH 278/301] Add model 2023-10-25-bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_6_en --- ..._morning_news_market_overview_ssec_6_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_6_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_6_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_6_en.md new file mode 100644 index 00000000000000..1267a8354a009b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_6_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_6 BertForSequenceClassification from hw2942 +author: John Snow Labs +name: bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_6 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_6` is a English model originally trained by hw2942. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_6_en_5.1.4_3.4_1698240802038.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_6_en_5.1.4_3.4_1698240802038.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_6","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_6","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_6| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|383.3 MB| + +## References + +https://huggingface.co/hw2942/bert-base-chinese-wallstreetcn-morning-news-market-overview-SSEC-6 \ No newline at end of file From 78c567264d44479ba54ded9a6ae3ce5bd2d15323 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 20:37:39 +0700 Subject: [PATCH 279/301] Add model 2023-10-25-finbert_finetuned_fg_single_sentence_news_weighted_en --- ...ned_fg_single_sentence_news_weighted_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-finbert_finetuned_fg_single_sentence_news_weighted_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-finbert_finetuned_fg_single_sentence_news_weighted_en.md b/docs/_posts/ahmedlone127/2023-10-25-finbert_finetuned_fg_single_sentence_news_weighted_en.md new file mode 100644 index 00000000000000..5e6e199c7e098c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-finbert_finetuned_fg_single_sentence_news_weighted_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English finbert_finetuned_fg_single_sentence_news_weighted BertForSequenceClassification from lucaordronneau +author: John Snow Labs +name: finbert_finetuned_fg_single_sentence_news_weighted +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`finbert_finetuned_fg_single_sentence_news_weighted` is a English model originally trained by lucaordronneau. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/finbert_finetuned_fg_single_sentence_news_weighted_en_5.1.4_3.4_1698241050659.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/finbert_finetuned_fg_single_sentence_news_weighted_en_5.1.4_3.4_1698241050659.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("finbert_finetuned_fg_single_sentence_news_weighted","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("finbert_finetuned_fg_single_sentence_news_weighted","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|finbert_finetuned_fg_single_sentence_news_weighted| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/lucaordronneau/finbert-finetuned-FG-SINGLE_SENTENCE-NEWS-WEIGHTED \ No newline at end of file From 74b1eaade12f6ab5ceeb27c0a10b7463d7789faa Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 20:42:56 +0700 Subject: [PATCH 280/301] Add model 2023-10-25-bert_cl_cf_1700_en --- .../2023-10-25-bert_cl_cf_1700_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_cl_cf_1700_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_cl_cf_1700_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_cl_cf_1700_en.md new file mode 100644 index 00000000000000..a54ad51438a87a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_cl_cf_1700_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_cl_cf_1700 BertForSequenceClassification from himanshubeniwal +author: John Snow Labs +name: bert_cl_cf_1700 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_cl_cf_1700` is a English model originally trained by himanshubeniwal. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_cl_cf_1700_en_5.1.4_3.4_1698241368584.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_cl_cf_1700_en_5.1.4_3.4_1698241368584.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_cl_cf_1700","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_cl_cf_1700","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_cl_cf_1700| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/himanshubeniwal/bert_cl_cf_1700 \ No newline at end of file From afc8437c3d3ee94f1bc16cbe98442ca0fbbf9c7f Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 20:47:31 +0700 Subject: [PATCH 281/301] Add model 2023-10-25-goog_bert_ft_cola_77_en --- .../2023-10-25-goog_bert_ft_cola_77_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_77_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_77_en.md b/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_77_en.md new file mode 100644 index 00000000000000..24e5ce5121f174 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_77_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English goog_bert_ft_cola_77 BertForSequenceClassification from Jeevesh8 +author: John Snow Labs +name: goog_bert_ft_cola_77 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`goog_bert_ft_cola_77` is a English model originally trained by Jeevesh8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/goog_bert_ft_cola_77_en_5.1.4_3.4_1698241641782.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/goog_bert_ft_cola_77_en_5.1.4_3.4_1698241641782.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("goog_bert_ft_cola_77","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("goog_bert_ft_cola_77","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|goog_bert_ft_cola_77| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Jeevesh8/goog_bert_ft_cola-77 \ No newline at end of file From 872d036de49d1f6d8f430b44330ad4355879827b Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 20:48:32 +0700 Subject: [PATCH 282/301] Add model 2023-10-25-bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_7_en --- ..._morning_news_market_overview_ssec_7_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_7_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_7_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_7_en.md new file mode 100644 index 00000000000000..20f18fe0923de3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_7_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_7 BertForSequenceClassification from hw2942 +author: John Snow Labs +name: bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_7 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_7` is a English model originally trained by hw2942. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_7_en_5.1.4_3.4_1698241641367.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_7_en_5.1.4_3.4_1698241641367.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_7","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_7","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_7| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|383.3 MB| + +## References + +https://huggingface.co/hw2942/bert-base-chinese-wallstreetcn-morning-news-market-overview-SSEC-7 \ No newline at end of file From cfedb292dc89151768dd0955a5588e1cd16a507b Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 20:57:39 +0700 Subject: [PATCH 283/301] Add model 2023-10-25-bert_classifier_prot_bfd_localization_en --- ...ert_classifier_prot_bfd_localization_en.md | 106 ++++++++++++++++++ 1 file changed, 106 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_classifier_prot_bfd_localization_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_classifier_prot_bfd_localization_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_classifier_prot_bfd_localization_en.md new file mode 100644 index 00000000000000..636458196bfb2d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_classifier_prot_bfd_localization_en.md @@ -0,0 +1,106 @@ +--- +layout: model +title: English BertForSequenceClassification Cased model (from Rostlab) +author: John Snow Labs +name: bert_classifier_prot_bfd_localization +date: 2023-10-25 +tags: [bert, sequence_classification, classification, open_source, en, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `prot_bert_bfd_localization` is a English model originally trained by `Rostlab`. + +## Predicted Entities + +`Mitochondrion`, `Plastid`, `Extracellular`, `Golgi.apparatus`, `Lysosome/Vacuole`, `Endoplasmic.reticulum`, `Cell.membrane`, `Cytoplasm`, `Peroxisome`, `Nucleus` + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_classifier_prot_bfd_localization_en_5.1.4_3.4_1698242237342.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_classifier_prot_bfd_localization_en_5.1.4_3.4_1698242237342.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +sequenceClassifier_loaded = BertForSequenceClassification.pretrained("bert_classifier_prot_bfd_localization","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("class") + +pipeline = Pipeline(stages=[documentAssembler, tokenizer,sequenceClassifier_loaded]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier_loaded = BertForSequenceClassification.pretrained("bert_classifier_prot_bfd_localization","en") + .setInputCols(Array("document", "token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer,sequenceClassifier_loaded)) + +val data = Seq("PUT YOUR STRING HERE").toDF("text") + +val result = pipeline.fit(data).transform(data) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("en.classify.prot_bfd_localization.bert.by_rostlab").predict("""PUT YOUR STRING HERE""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_classifier_prot_bfd_localization| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|1.6 GB| +|Case sensitive:|true| +|Max sentence length:|256| + +## References + +References + +- https://huggingface.co/Rostlab/prot_bert_bfd_localization \ No newline at end of file From 3967eba63bbb74c5129e62c2a639d017e8d09957 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 20:59:12 +0700 Subject: [PATCH 284/301] Add model 2023-10-25-bert_cl_g_1700_en --- .../2023-10-25-bert_cl_g_1700_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_cl_g_1700_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_cl_g_1700_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_cl_g_1700_en.md new file mode 100644 index 00000000000000..7aa7dcd944fc42 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_cl_g_1700_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_cl_g_1700 BertForSequenceClassification from himanshubeniwal +author: John Snow Labs +name: bert_cl_g_1700 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_cl_g_1700` is a English model originally trained by himanshubeniwal. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_cl_g_1700_en_5.1.4_3.4_1698242314807.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_cl_g_1700_en_5.1.4_3.4_1698242314807.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_cl_g_1700","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_cl_g_1700","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_cl_g_1700| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/himanshubeniwal/bert_cl_g_1700 \ No newline at end of file From afd6f9f2295b2476b10b08fb2818678cda51d437 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 21:00:13 +0700 Subject: [PATCH 285/301] Add model 2023-10-25-bert_sentence_classifier_en --- .../2023-10-25-bert_sentence_classifier_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_sentence_classifier_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_sentence_classifier_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_sentence_classifier_en.md new file mode 100644 index 00000000000000..611b1a83e0f076 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_sentence_classifier_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_sentence_classifier BertForSequenceClassification from Paleontolog +author: John Snow Labs +name: bert_sentence_classifier +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_sentence_classifier` is a English model originally trained by Paleontolog. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_sentence_classifier_en_5.1.4_3.4_1698242352864.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_sentence_classifier_en_5.1.4_3.4_1698242352864.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_sentence_classifier","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_sentence_classifier","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_sentence_classifier| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|667.3 MB| + +## References + +https://huggingface.co/Paleontolog/bert_sentence_classifier \ No newline at end of file From a9bbc43d02fa3bb43180710d9db172d9c93b7cfc Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 21:01:13 +0700 Subject: [PATCH 286/301] Add model 2023-10-25-bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_8_en --- ..._morning_news_market_overview_ssec_8_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_8_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_8_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_8_en.md new file mode 100644 index 00000000000000..4ebd1ef10fd525 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_8_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_8 BertForSequenceClassification from hw2942 +author: John Snow Labs +name: bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_8 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_8` is a English model originally trained by hw2942. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_8_en_5.1.4_3.4_1698242416129.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_8_en_5.1.4_3.4_1698242416129.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_8","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_8","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_8| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|383.3 MB| + +## References + +https://huggingface.co/hw2942/bert-base-chinese-wallstreetcn-morning-news-market-overview-SSEC-8 \ No newline at end of file From c18d9537825de2c63ae12dd314c9e8274a25febc Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 21:02:13 +0700 Subject: [PATCH 287/301] Add model 2023-10-25-goog_bert_ft_cola_81_en --- .../2023-10-25-goog_bert_ft_cola_81_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_81_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_81_en.md b/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_81_en.md new file mode 100644 index 00000000000000..11f590ad3c0e39 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_81_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English goog_bert_ft_cola_81 BertForSequenceClassification from Jeevesh8 +author: John Snow Labs +name: goog_bert_ft_cola_81 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`goog_bert_ft_cola_81` is a English model originally trained by Jeevesh8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/goog_bert_ft_cola_81_en_5.1.4_3.4_1698242418276.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/goog_bert_ft_cola_81_en_5.1.4_3.4_1698242418276.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("goog_bert_ft_cola_81","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("goog_bert_ft_cola_81","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|goog_bert_ft_cola_81| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Jeevesh8/goog_bert_ft_cola-81 \ No newline at end of file From d96afa95c09f04ed54377818d08c7cfa9b2a5fcf Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 21:16:15 +0700 Subject: [PATCH 288/301] Add model 2023-10-25-bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_9_en --- ..._morning_news_market_overview_ssec_9_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_9_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_9_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_9_en.md new file mode 100644 index 00000000000000..950fda31680853 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_9_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_9 BertForSequenceClassification from hw2942 +author: John Snow Labs +name: bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_9 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_9` is a English model originally trained by hw2942. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_9_en_5.1.4_3.4_1698243365653.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_9_en_5.1.4_3.4_1698243365653.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_9","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_9","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_9| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|383.3 MB| + +## References + +https://huggingface.co/hw2942/bert-base-chinese-wallstreetcn-morning-news-market-overview-SSEC-9 \ No newline at end of file From 3cbbd25b9941cde2a68d46b3ce7e3d8d8a45792c Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 21:18:34 +0700 Subject: [PATCH 289/301] Add model 2023-10-25-rubert_rusentitweet_sismetanin_en --- ...10-25-rubert_rusentitweet_sismetanin_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-rubert_rusentitweet_sismetanin_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-rubert_rusentitweet_sismetanin_en.md b/docs/_posts/ahmedlone127/2023-10-25-rubert_rusentitweet_sismetanin_en.md new file mode 100644 index 00000000000000..e9f32b9b16863f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-rubert_rusentitweet_sismetanin_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English rubert_rusentitweet_sismetanin BertForSequenceClassification from sismetanin +author: John Snow Labs +name: rubert_rusentitweet_sismetanin +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`rubert_rusentitweet_sismetanin` is a English model originally trained by sismetanin. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/rubert_rusentitweet_sismetanin_en_5.1.4_3.4_1698243502522.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/rubert_rusentitweet_sismetanin_en_5.1.4_3.4_1698243502522.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("rubert_rusentitweet_sismetanin","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("rubert_rusentitweet_sismetanin","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|rubert_rusentitweet_sismetanin| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|666.5 MB| + +## References + +https://huggingface.co/sismetanin/rubert-rusentitweet \ No newline at end of file From 0f0a5eff8c9f7a30d36d6da9ba66bcc1731e6d58 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 21:22:46 +0700 Subject: [PATCH 290/301] Add model 2023-10-25-goog_bert_ft_cola_91_en --- .../2023-10-25-goog_bert_ft_cola_91_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_91_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_91_en.md b/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_91_en.md new file mode 100644 index 00000000000000..f9779e0c363f8b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_91_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English goog_bert_ft_cola_91 BertForSequenceClassification from Jeevesh8 +author: John Snow Labs +name: goog_bert_ft_cola_91 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`goog_bert_ft_cola_91` is a English model originally trained by Jeevesh8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/goog_bert_ft_cola_91_en_5.1.4_3.4_1698243759493.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/goog_bert_ft_cola_91_en_5.1.4_3.4_1698243759493.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("goog_bert_ft_cola_91","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("goog_bert_ft_cola_91","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|goog_bert_ft_cola_91| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Jeevesh8/goog_bert_ft_cola-91 \ No newline at end of file From 686f0fba1019deaee2f8059265de11ba77cb4d97 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 21:28:28 +0700 Subject: [PATCH 291/301] Add model 2023-10-25-norbert2_sentiment_norec_8_en --- ...023-10-25-norbert2_sentiment_norec_8_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-norbert2_sentiment_norec_8_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-norbert2_sentiment_norec_8_en.md b/docs/_posts/ahmedlone127/2023-10-25-norbert2_sentiment_norec_8_en.md new file mode 100644 index 00000000000000..fe27aa1b1fce53 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-norbert2_sentiment_norec_8_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English norbert2_sentiment_norec_8 BertForSequenceClassification from NTCAL +author: John Snow Labs +name: norbert2_sentiment_norec_8 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`norbert2_sentiment_norec_8` is a English model originally trained by NTCAL. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/norbert2_sentiment_norec_8_en_5.1.4_3.4_1698244059994.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/norbert2_sentiment_norec_8_en_5.1.4_3.4_1698244059994.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("norbert2_sentiment_norec_8","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("norbert2_sentiment_norec_8","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|norbert2_sentiment_norec_8| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/NTCAL/norbert2_sentiment_norec_8 \ No newline at end of file From 81114980df0c262c77d806a4210e6bc3a0a8c076 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 21:33:55 +0700 Subject: [PATCH 292/301] Add model 2023-10-25-bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_10_en --- ...morning_news_market_overview_ssec_10_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_10_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_10_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_10_en.md new file mode 100644 index 00000000000000..ce8994b8d36642 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_10_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_10 BertForSequenceClassification from hw2942 +author: John Snow Labs +name: bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_10 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_10` is a English model originally trained by hw2942. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_10_en_5.1.4_3.4_1698244428839.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_10_en_5.1.4_3.4_1698244428839.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_10","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_10","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_chinese_wallstreetcn_morning_news_market_overview_ssec_10| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|383.3 MB| + +## References + +https://huggingface.co/hw2942/bert-base-chinese-wallstreetcn-morning-news-market-overview-SSEC-10 \ No newline at end of file From e0a20b16d61d0fc4cfc1e03a7e3a1ae6a0aa478b Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 21:35:34 +0700 Subject: [PATCH 293/301] Add model 2023-10-25-goog_bert_ft_cola_89_en --- .../2023-10-25-goog_bert_ft_cola_89_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_89_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_89_en.md b/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_89_en.md new file mode 100644 index 00000000000000..104864c31bf932 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_89_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English goog_bert_ft_cola_89 BertForSequenceClassification from Jeevesh8 +author: John Snow Labs +name: goog_bert_ft_cola_89 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`goog_bert_ft_cola_89` is a English model originally trained by Jeevesh8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/goog_bert_ft_cola_89_en_5.1.4_3.4_1698244522753.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/goog_bert_ft_cola_89_en_5.1.4_3.4_1698244522753.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("goog_bert_ft_cola_89","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("goog_bert_ft_cola_89","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|goog_bert_ft_cola_89| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Jeevesh8/goog_bert_ft_cola-89 \ No newline at end of file From 329d617b3c9f1a75a39796b6acdb4c97bda06c3d Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 21:36:34 +0700 Subject: [PATCH 294/301] Add model 2023-10-25-bert_base_uncased_random_weights_s42_en --- ...bert_base_uncased_random_weights_s42_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_base_uncased_random_weights_s42_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_base_uncased_random_weights_s42_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_base_uncased_random_weights_s42_en.md new file mode 100644 index 00000000000000..7fa0fc6ddb3e1e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_base_uncased_random_weights_s42_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_base_uncased_random_weights_s42 BertForSequenceClassification from EhsanAghazadeh +author: John Snow Labs +name: bert_base_uncased_random_weights_s42 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_random_weights_s42` is a English model originally trained by EhsanAghazadeh. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_random_weights_s42_en_5.1.4_3.4_1698244533509.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_random_weights_s42_en_5.1.4_3.4_1698244533509.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_base_uncased_random_weights_s42","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_base_uncased_random_weights_s42","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_random_weights_s42| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|408.1 MB| + +## References + +https://huggingface.co/EhsanAghazadeh/bert-base-uncased-random-weights-S42 \ No newline at end of file From d5e1efb58ee70fd9f22a301cdd4497f3a8a3d635 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 21:43:51 +0700 Subject: [PATCH 295/301] Add model 2023-10-25-norbert2_sentiment_norec_9_en --- ...023-10-25-norbert2_sentiment_norec_9_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-norbert2_sentiment_norec_9_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-norbert2_sentiment_norec_9_en.md b/docs/_posts/ahmedlone127/2023-10-25-norbert2_sentiment_norec_9_en.md new file mode 100644 index 00000000000000..13708009c12d63 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-norbert2_sentiment_norec_9_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English norbert2_sentiment_norec_9 BertForSequenceClassification from NTCAL +author: John Snow Labs +name: norbert2_sentiment_norec_9 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`norbert2_sentiment_norec_9` is a English model originally trained by NTCAL. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/norbert2_sentiment_norec_9_en_5.1.4_3.4_1698245023007.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/norbert2_sentiment_norec_9_en_5.1.4_3.4_1698245023007.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("norbert2_sentiment_norec_9","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("norbert2_sentiment_norec_9","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|norbert2_sentiment_norec_9| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|467.4 MB| + +## References + +https://huggingface.co/NTCAL/norbert2_sentiment_norec_9 \ No newline at end of file From 568e0028c66f309a4c4500f26db4742d1374002a Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 21:45:24 +0700 Subject: [PATCH 296/301] Add model 2023-10-25-bert_base_uncased_finetuned_cola_ruizhou_en --- ..._base_uncased_finetuned_cola_ruizhou_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_base_uncased_finetuned_cola_ruizhou_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_base_uncased_finetuned_cola_ruizhou_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_base_uncased_finetuned_cola_ruizhou_en.md new file mode 100644 index 00000000000000..f4c5c411b5d34a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_base_uncased_finetuned_cola_ruizhou_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_base_uncased_finetuned_cola_ruizhou BertForSequenceClassification from Ruizhou +author: John Snow Labs +name: bert_base_uncased_finetuned_cola_ruizhou +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_finetuned_cola_ruizhou` is a English model originally trained by Ruizhou. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_finetuned_cola_ruizhou_en_5.1.4_3.4_1698245116739.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_finetuned_cola_ruizhou_en_5.1.4_3.4_1698245116739.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_base_uncased_finetuned_cola_ruizhou","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_base_uncased_finetuned_cola_ruizhou","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_finetuned_cola_ruizhou| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Ruizhou/bert-base-uncased-finetuned-cola \ No newline at end of file From 0ca30d88677b1dbeb598992ac371034689ea328b Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 21:46:24 +0700 Subject: [PATCH 297/301] Add model 2023-10-25-fine_tuned_koreanindonli_kornli_with_bert_base_en --- ..._koreanindonli_kornli_with_bert_base_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-fine_tuned_koreanindonli_kornli_with_bert_base_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-fine_tuned_koreanindonli_kornli_with_bert_base_en.md b/docs/_posts/ahmedlone127/2023-10-25-fine_tuned_koreanindonli_kornli_with_bert_base_en.md new file mode 100644 index 00000000000000..c609724838520b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-fine_tuned_koreanindonli_kornli_with_bert_base_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English fine_tuned_koreanindonli_kornli_with_bert_base BertForSequenceClassification from muhammadravi251001 +author: John Snow Labs +name: fine_tuned_koreanindonli_kornli_with_bert_base +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`fine_tuned_koreanindonli_kornli_with_bert_base` is a English model originally trained by muhammadravi251001. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/fine_tuned_koreanindonli_kornli_with_bert_base_en_5.1.4_3.4_1698245173848.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/fine_tuned_koreanindonli_kornli_with_bert_base_en_5.1.4_3.4_1698245173848.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("fine_tuned_koreanindonli_kornli_with_bert_base","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("fine_tuned_koreanindonli_kornli_with_bert_base","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|fine_tuned_koreanindonli_kornli_with_bert_base| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|414.6 MB| + +## References + +https://huggingface.co/muhammadravi251001/fine-tuned-KoreanIndoNLI-KorNLI-with-bert-base \ No newline at end of file From 775c34c78b21be27ef0187842a07b0de912a182a Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 21:47:25 +0700 Subject: [PATCH 298/301] Add model 2023-10-25-goog_bert_ft_cola_82_en --- .../2023-10-25-goog_bert_ft_cola_82_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_82_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_82_en.md b/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_82_en.md new file mode 100644 index 00000000000000..c90ed243a1122c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-goog_bert_ft_cola_82_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English goog_bert_ft_cola_82 BertForSequenceClassification from Jeevesh8 +author: John Snow Labs +name: goog_bert_ft_cola_82 +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`goog_bert_ft_cola_82` is a English model originally trained by Jeevesh8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/goog_bert_ft_cola_82_en_5.1.4_3.4_1698245210556.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/goog_bert_ft_cola_82_en_5.1.4_3.4_1698245210556.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("goog_bert_ft_cola_82","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("goog_bert_ft_cola_82","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|goog_bert_ft_cola_82| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Jeevesh8/goog_bert_ft_cola-82 \ No newline at end of file From 247b4c3e348eb137e5526916f17b301d0fedfc4d Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 21:48:25 +0700 Subject: [PATCH 299/301] Add model 2023-10-25-finbert_fls_en --- .../ahmedlone127/2023-10-25-finbert_fls_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-finbert_fls_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-finbert_fls_en.md b/docs/_posts/ahmedlone127/2023-10-25-finbert_fls_en.md new file mode 100644 index 00000000000000..db3949ce9f2f7a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-finbert_fls_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English finbert_fls BertForSequenceClassification from yiyanghkust +author: John Snow Labs +name: finbert_fls +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`finbert_fls` is a English model originally trained by yiyanghkust. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/finbert_fls_en_5.1.4_3.4_1698245276476.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/finbert_fls_en_5.1.4_3.4_1698245276476.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("finbert_fls","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("finbert_fls","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|finbert_fls| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|411.6 MB| + +## References + +https://huggingface.co/yiyanghkust/finbert-fls \ No newline at end of file From 541879935746cff6e866d85466a837ca73b578eb Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 21:58:38 +0700 Subject: [PATCH 300/301] Add model 2023-10-25-bert_base_uncased_finetuned_mrpc_ruizhou_en --- ..._base_uncased_finetuned_mrpc_ruizhou_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_base_uncased_finetuned_mrpc_ruizhou_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_base_uncased_finetuned_mrpc_ruizhou_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_base_uncased_finetuned_mrpc_ruizhou_en.md new file mode 100644 index 00000000000000..3ffc3b947b8ed0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_base_uncased_finetuned_mrpc_ruizhou_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_base_uncased_finetuned_mrpc_ruizhou BertForSequenceClassification from Ruizhou +author: John Snow Labs +name: bert_base_uncased_finetuned_mrpc_ruizhou +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_finetuned_mrpc_ruizhou` is a English model originally trained by Ruizhou. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_finetuned_mrpc_ruizhou_en_5.1.4_3.4_1698245908228.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_finetuned_mrpc_ruizhou_en_5.1.4_3.4_1698245908228.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_base_uncased_finetuned_mrpc_ruizhou","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_base_uncased_finetuned_mrpc_ruizhou","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_finetuned_mrpc_ruizhou| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Ruizhou/bert-base-uncased-finetuned-mrpc \ No newline at end of file From 44a3f8c20823dab5417a828d2740a549628db0e1 Mon Sep 17 00:00:00 2001 From: ahmedlone127 Date: Wed, 25 Oct 2023 21:59:39 +0700 Subject: [PATCH 301/301] Add model 2023-10-25-bert_cnn_news_en --- .../2023-10-25-bert_cnn_news_en.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2023-10-25-bert_cnn_news_en.md diff --git a/docs/_posts/ahmedlone127/2023-10-25-bert_cnn_news_en.md b/docs/_posts/ahmedlone127/2023-10-25-bert_cnn_news_en.md new file mode 100644 index 00000000000000..8c7b8108a52125 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-10-25-bert_cnn_news_en.md @@ -0,0 +1,97 @@ +--- +layout: model +title: English bert_cnn_news BertForSequenceClassification from AyoubChLin +author: John Snow Labs +name: bert_cnn_news +date: 2023-10-25 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.4 +spark_version: 3.4 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_cnn_news` is a English model originally trained by AyoubChLin. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_cnn_news_en_5.1.4_3.4_1698245908185.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_cnn_news_en_5.1.4_3.4_1698245908185.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("bert_cnn_news","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) + +``` +```scala + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("bert_cnn_news","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_cnn_news| +|Compatibility:|Spark NLP 5.1.4+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/AyoubChLin/bert_cnn_news \ No newline at end of file