From c1f73c9c5137a8312120beec1bb65bdbcef479a2 Mon Sep 17 00:00:00 2001 From: DevinTDHa Date: Sun, 3 Dec 2023 01:38:10 +0700 Subject: [PATCH 1/2] Add model 2023-12-02-zero_shot_classifier_clip_vit_base_patch32_en --- ...hot_classifier_clip_vit_base_patch32_en.md | 149 ++++++++++++++++++ 1 file changed, 149 insertions(+) create mode 100644 docs/_posts/DevinTDHa/2023-12-02-zero_shot_classifier_clip_vit_base_patch32_en.md diff --git a/docs/_posts/DevinTDHa/2023-12-02-zero_shot_classifier_clip_vit_base_patch32_en.md b/docs/_posts/DevinTDHa/2023-12-02-zero_shot_classifier_clip_vit_base_patch32_en.md new file mode 100644 index 00000000000000..032cf95258093f --- /dev/null +++ b/docs/_posts/DevinTDHa/2023-12-02-zero_shot_classifier_clip_vit_base_patch32_en.md @@ -0,0 +1,149 @@ +--- +layout: model +title: Image Zero Shot Classification with CLIP +author: John Snow Labs +name: zero_shot_classifier_clip_vit_base_patch32 +date: 2023-12-02 +tags: [classification, image, en, zero_shot, open_source, onnx] +task: Zero-Shot Classification +language: en +edition: Spark NLP 5.2.0 +spark_version: 3.4 +supported: true +engine: onnx +annotator: CLIPForZeroShotClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +CLIP (Contrastive Language-Image Pre-Training) is a neural network that was trained on image +and text pairs. It has the ability to predict images without training on any hard-coded +labels. This makes it very flexible, as labels can be provided during inference. This is +similar to the zero-shot capabilities of the GPT-2 and 3 models. + +This model was imported from huggingface transformers: +https://huggingface.co/openai/clip-vit-base-patch32 + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/zero_shot_classifier_clip_vit_base_patch32_en_5.2.0_3.4_1701541274927.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/zero_shot_classifier_clip_vit_base_patch32_en_5.2.0_3.4_1701541274927.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +import sparknlp +from sparknlp.base import * +from sparknlp.annotator import * +from pyspark.ml import Pipeline + +imageDF = spark.read \ + .format("image") \ + .option("dropInvalid", value = True) \ + .load("src/test/resources/image/") + +imageAssembler: ImageAssembler = ImageAssembler() \ + .setInputCol("image") \ + .setOutputCol("image_assembler") + +candidateLabels = [ + "a photo of a bird", + "a photo of a cat", + "a photo of a dog", + "a photo of a hen", + "a photo of a hippo", + "a photo of a room", + "a photo of a tractor", + "a photo of an ostrich", + "a photo of an ox"] + +imageClassifier = CLIPForZeroShotClassification \ + .pretrained() \ + .setInputCols(["image_assembler"]) \ + .setOutputCol("label") \ + .setCandidateLabels(candidateLabels) + +pipeline = Pipeline().setStages([imageAssembler, imageClassifier]) +pipelineDF = pipeline.fit(imageDF).transform(imageDF) +pipelineDF \ + .selectExpr("reverse(split(image.origin, '/'))[0] as image_name", "label.result") \ + .show(truncate=False) +``` +```scala +import com.johnsnowlabs.nlp.ImageAssembler +import com.johnsnowlabs.nlp.annotator._ +import org.apache.spark.ml.Pipeline +val imageDF = ResourceHelper.spark.read + .format("image") + .option("dropInvalid", value = true) + .load("src/test/resources/image/") +val imageAssembler: ImageAssembler = new ImageAssembler() + .setInputCol("image") + .setOutputCol("image_assembler") +val candidateLabels = Array( + "a photo of a bird", + "a photo of a cat", + "a photo of a dog", + "a photo of a hen", + "a photo of a hippo", + "a photo of a room", + "a photo of a tractor", + "a photo of an ostrich", + "a photo of an ox") +val imageClassifier = CLIPForZeroShotClassification + .pretrained() + .setInputCols("image_assembler") + .setOutputCol("label") + .setCandidateLabels(candidateLabels) +val pipeline = + new Pipeline().setStages(Array(imageAssembler, imageClassifier)).fit(imageDF).transform(imageDF) +pipeline + .selectExpr("reverse(split(image.origin, '/'))[0] as image_name", "label.result") + .show(truncate = false) +``` +
+ +## Results + +```bash ++-----------------+-----------------------+ +|image_name |result | ++-----------------+-----------------------+ +|palace.JPEG |[a photo of a room] | +|egyptian_cat.jpeg|[a photo of a cat] | +|hippopotamus.JPEG|[a photo of a hippo] | +|hen.JPEG |[a photo of a hen] | +|ostrich.JPEG |[a photo of an ostrich]| +|junco.JPEG |[a photo of a bird] | +|bluetick.jpg |[a photo of a dog] | +|chihuahua.jpg |[a photo of a dog] | +|tractor.JPEG |[a photo of a tractor] | +|ox.JPEG |[a photo of an ox] | ++-----------------+-----------------------+ +``` + +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|zero_shot_classifier_clip_vit_base_patch32| +|Compatibility:|Spark NLP 5.2.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[image_assembler]| +|Output Labels:|[classification]| +|Language:|en| +|Size:|392.8 MB| \ No newline at end of file From 7648935d253886756161688eda5e699a9c87edd7 Mon Sep 17 00:00:00 2001 From: Maziyar Panahi Date: Fri, 8 Dec 2023 09:35:18 +0100 Subject: [PATCH 2/2] test 3.0 as version to support all spark 3.x --- ...12-02-zero_shot_classifier_clip_vit_base_patch32_en.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/_posts/DevinTDHa/2023-12-02-zero_shot_classifier_clip_vit_base_patch32_en.md b/docs/_posts/DevinTDHa/2023-12-02-zero_shot_classifier_clip_vit_base_patch32_en.md index 032cf95258093f..0cd9cc2246eaa6 100644 --- a/docs/_posts/DevinTDHa/2023-12-02-zero_shot_classifier_clip_vit_base_patch32_en.md +++ b/docs/_posts/DevinTDHa/2023-12-02-zero_shot_classifier_clip_vit_base_patch32_en.md @@ -8,7 +8,7 @@ tags: [classification, image, en, zero_shot, open_source, onnx] task: Zero-Shot Classification language: en edition: Spark NLP 5.2.0 -spark_version: 3.4 +spark_version: 3.0 supported: true engine: onnx annotator: CLIPForZeroShotClassification @@ -34,8 +34,8 @@ https://huggingface.co/openai/clip-vit-base-patch32 {:.btn-box} -[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/zero_shot_classifier_clip_vit_base_patch32_en_5.2.0_3.4_1701541274927.zip){:.button.button-orange.button-orange-trans.arr.button-icon} -[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/zero_shot_classifier_clip_vit_base_patch32_en_5.2.0_3.4_1701541274927.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/zero_shot_classifier_clip_vit_base_patch32_en_5.2.0_3.0_1701541274927.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/zero_shot_classifier_clip_vit_base_patch32_en_5.2.0_3.0_1701541274927.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} ## How to use @@ -146,4 +146,4 @@ pipeline |Input Labels:|[image_assembler]| |Output Labels:|[classification]| |Language:|en| -|Size:|392.8 MB| \ No newline at end of file +|Size:|392.8 MB|