diff --git a/examples/python/transformers/HuggingFace in Spark NLP - ViTForImageClassification.ipynb b/examples/python/transformers/HuggingFace in Spark NLP - ViTForImageClassification.ipynb index 8018fe461824c3..56c1a80d294b41 100644 --- a/examples/python/transformers/HuggingFace in Spark NLP - ViTForImageClassification.ipynb +++ b/examples/python/transformers/HuggingFace in Spark NLP - ViTForImageClassification.ipynb @@ -8,7 +8,7 @@ "source": [ "![JohnSnowLabs](https://sparknlp.org/assets/images/logo.png)\n", "\n", - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp/blob/master/examples/python/HuggingFace in Spark NLP - ViTForImageClassification.ipynb)" + "[![Open In Colab](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp/blob/master/examples/python/transformers/HuggingFace%20in%20Spark%20NLP%20-%20ViTForImageClassification.ipynb)" ] }, { diff --git a/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_ConvNextForImageClassification.ipynb b/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_ConvNextForImageClassification.ipynb new file mode 100644 index 00000000000000..86f42aa8ce50fa --- /dev/null +++ b/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_ConvNextForImageClassification.ipynb @@ -0,0 +1,511 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "LjQoSZTMUH_5" + }, + "source": [ + "![JohnSnowLabs](https://sparknlp.org/assets/images/logo.png)\n", + "\n", + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp/blob/master/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_ConvNextForImageClassification.ipynb)\n", + "\n", + "# Import ONNX ConvNextForImageClassification models from HuggingFace 🤗 into Spark NLP 🚀\n", + "\n", + "Let's keep in mind a few things before we start 😊\n", + "\n", + "- ONNX support was introduced in `Spark NLP 5.1.0`, enabling high performance inference for models. Please make sure you have upgraded to the latest Spark NLP release.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "an8-RiT0UH_8" + }, + "source": [ + "## Export and Save HuggingFace model" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "oCNlrbMWUH_8" + }, + "source": [ + "- Let's install `transformers` package with the `onnx` extension and it's dependencies. You don't need `onnx` to be installed for Spark NLP, however, we need it to load and save models from HuggingFace.\n", + "- We lock `transformers` on version `4.31.0`. This doesn't mean it won't work with the future releases, but we wanted you to know which versions have been tested successfully." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "id": "XezgP-k2UH_8", + "outputId": "5d37ce1b-044e-48b8-eadf-74855b4279d9", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.4/7.4 MB\u001b[0m \u001b[31m45.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m424.7/424.7 kB\u001b[0m \u001b[31m37.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.8/7.8 MB\u001b[0m \u001b[31m44.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m84.5/84.5 kB\u001b[0m \u001b[31m9.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m455.8/455.8 kB\u001b[0m \u001b[31m38.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.8/6.8 MB\u001b[0m \u001b[31m44.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m212.7/212.7 kB\u001b[0m \u001b[31m22.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m46.0/46.0 kB\u001b[0m \u001b[31m4.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m547.8/547.8 kB\u001b[0m \u001b[31m35.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m15.9/15.9 MB\u001b[0m \u001b[31m44.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m55.5/55.5 kB\u001b[0m \u001b[31m3.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m21.3/21.3 MB\u001b[0m \u001b[31m55.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m86.8/86.8 kB\u001b[0m \u001b[31m8.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m39.9/39.9 MB\u001b[0m \u001b[31m13.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m116.3/116.3 kB\u001b[0m \u001b[31m9.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m64.9/64.9 kB\u001b[0m \u001b[31m6.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m194.1/194.1 kB\u001b[0m \u001b[31m19.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m11.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.1/1.1 MB\u001b[0m \u001b[31m39.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", + "cudf-cu12 24.4.1 requires pyarrow<15.0.0a0,>=14.0.1, but you have pyarrow 17.0.0 which is incompatible.\n", + "google-colab 1.0.0 requires requests==2.31.0, but you have requests 2.32.3 which is incompatible.\n", + "ibis-framework 8.0.0 requires pyarrow<16,>=2, but you have pyarrow 17.0.0 which is incompatible.\n", + "tensorflow 2.15.0 requires protobuf!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<5.0.0dev,>=3.20.3, but you have protobuf 3.20.2 which is incompatible.\n", + "tensorflow-metadata 1.15.0 requires protobuf<4.21,>=3.20.3; python_version < \"3.11\", but you have protobuf 3.20.2 which is incompatible.\u001b[0m\u001b[31m\n", + "\u001b[0m" + ] + } + ], + "source": [ + "!pip install -q --upgrade \"transformers[onnx]==4.31.0\" optimum" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "UqoI5yIUUH_9" + }, + "source": [ + "- HuggingFace has an extension called Optimum which offers specialized model inference, including ONNX. We can use this to import and export ONNX models with `from_pretrained` and `save_pretrained`.\n", + "- We'll use the [facebook/convnext-tiny-224](https://huggingface.co/facebook/convnext-tiny-224) model from HuggingFace as an example and export it with the `optimum-cli`." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "id": "XwylSoFOUH_9" + }, + "outputs": [], + "source": [ + "MODEL_NAME = \"facebook/convnext-tiny-224\"\n", + "EXPORT_PATH = f\"export_onnx/{MODEL_NAME}\"" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "id": "OTr9oYDwUH_-", + "outputId": "ceb3b6bd-0169-4364-ccb9-c30ec6b47020", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "2024-07-20 18:51:04.028266: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n", + "2024-07-20 18:51:04.028337: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n", + "2024-07-20 18:51:04.177094: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n", + "2024-07-20 18:51:06.270486: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n", + "Framework not specified. Using pt to export the model.\n", + "/usr/local/lib/python3.10/dist-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n", + " warnings.warn(\n", + "config.json: 100% 69.6k/69.6k [00:00<00:00, 2.97MB/s]\n", + "pytorch_model.bin: 100% 114M/114M [00:00<00:00, 140MB/s]\n", + "Automatic task detection to image-classification.\n", + "preprocessor_config.json: 100% 266/266 [00:00<00:00, 1.58MB/s]\n", + "Could not find image processor class in the image processor config or the model config. Loading based on pattern matching with the model's feature extractor configuration.\n", + "/usr/local/lib/python3.10/dist-packages/transformers/models/convnext/feature_extraction_convnext.py:28: FutureWarning: The class ConvNextFeatureExtractor is deprecated and will be removed in version 5 of Transformers. Please use ConvNextImageProcessor instead.\n", + " warnings.warn(\n", + "Using the export variant default. Available variants are:\n", + " - default: The default ONNX variant.\n", + "Could not find image processor class in the image processor config or the model config. Loading based on pattern matching with the model's feature extractor configuration.\n", + "\n", + "***** Exporting submodel 1/1: ConvNextForImageClassification *****\n", + "Using framework PyTorch: 2.3.1+cu121\n", + "/usr/local/lib/python3.10/dist-packages/transformers/models/convnext/modeling_convnext.py:144: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!\n", + " if num_channels != self.num_channels:\n", + "Post-processing the exported models...\n", + "Deduplicating shared (tied) weights...\n", + "\n", + "Validating ONNX model export_onnx/facebook/convnext-tiny-224/model.onnx...\n", + "\t-[✓] ONNX model output names match reference model (logits)\n", + "\t- Validating ONNX Model output \"logits\":\n", + "\t\t-[✓] (2, 1000) matches (2, 1000)\n", + "\t\t-[✓] all values close (atol: 1e-05)\n", + "The ONNX export succeeded and the exported model was saved at: export_onnx/facebook/convnext-tiny-224\n" + ] + } + ], + "source": [ + "! optimum-cli export onnx --model {MODEL_NAME} {EXPORT_PATH}" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "FV4pziK4UH_-" + }, + "source": [ + "We have to move additional model assets into a seperate folder, so that Spark NLP can load it properly." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "id": "Ar_o_tJIUH_-", + "outputId": "abd4422b-5c83-4eb8-c2d6-9874222c08a5", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "mv: cannot stat 'export_onnx/facebook/convnext-tiny-224/*.txt': No such file or directory\n" + ] + } + ], + "source": [ + "! mkdir -p {EXPORT_PATH}/assets\n", + "! mv -t {EXPORT_PATH}/assets {EXPORT_PATH}/*.json {EXPORT_PATH}/*.txt" + ] + }, + { + "cell_type": "code", + "source": [ + "import json\n", + "\n", + "config = open(f\"{EXPORT_PATH}/assets/config.json\")\n", + "model_data = json.load(config)\n", + "json_data = json.dumps(model_data['id2label'])\n", + "# Let's make sure the id is type int and not string\n", + "new_dict = dict()\n", + "old_dict = json.loads(json_data)\n", + "for k in old_dict:\n", + " v = old_dict[k]\n", + " if type(k) == str:\n", + " k = int(k)\n", + " new_dict[v] = k\n", + "json_data = new_dict\n", + "\n", + "# now we can save the labels.json to our assets directory\n", + "with open(f'{EXPORT_PATH}/assets/labels.json', 'w') as outfile:\n", + " json.dump(json_data, outfile)\n", + " outfile.write('\\n')" + ], + "metadata": { + "id": "xlC0UD_lqrV7" + }, + "execution_count": 5, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "f6GW8l2fUH_-" + }, + "source": [ + "Let's have a look inside these two directories and see what we are dealing with:" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "id": "-WYraOCfUH_-", + "outputId": "28e791e3-33f6-4cf6-c98b-134ea866af7f", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "total 111824\n", + "drwxr-xr-x 2 root root 4096 Jul 20 18:51 assets\n", + "-rw-r--r-- 1 root root 114500412 Jul 20 18:51 model.onnx\n" + ] + } + ], + "source": [ + "!ls -l {EXPORT_PATH}" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "id": "ukZxhGpWUH_-", + "outputId": "f566c890-85be-4fc7-94ec-ba3e175e348c", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "total 108\n", + "-rw-r--r-- 1 root root 69815 Jul 20 18:51 config.json\n", + "-rw-r--r-- 1 root root 29552 Jul 20 18:51 labels.json\n", + "-rw-r--r-- 1 root root 410 Jul 20 18:51 preprocessor_config.json\n" + ] + } + ], + "source": [ + "!ls -l {EXPORT_PATH}/assets" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pr7NE5DBUH__" + }, + "source": [ + "## Import and Save ConvNextForImageClassification in Spark NLP\n", + "\n", + "- Let's install and setup Spark NLP in Google Colab\n", + "- This part is pretty easy via our simple script\n", + "- Additionally, we need to upgrade Spark to version 3.4.1." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "acU9SZq-UH__" + }, + "outputs": [], + "source": [ + "! wget -q http://setup.johnsnowlabs.com/colab.sh -O - | bash\n", + "! pip install -U pyspark==3.4.1" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "yRUJ0CtfUH__" + }, + "source": [ + "Let's start Spark with Spark NLP included via our simple `start()` function" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "4kQTKjcWUH__" + }, + "outputs": [], + "source": [ + "import sparknlp\n", + "\n", + "# let's start Spark with Spark NLP\n", + "spark = sparknlp.start()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "1FIOCiZxUH__" + }, + "source": [ + "- Let's use `loadSavedModel` functon in `ConvNextForImageClassification` which allows us to load the ONNX model\n", + "- Most params will be set automatically. They can also be set later after loading the model in `ConvNextForImageClassification` during runtime, so don't worry about setting them now\n", + "- `loadSavedModel` accepts two params, first is the path to the exported model. The second is the SparkSession that is `spark` variable we previously started via `sparknlp.start()`\n", + "- NOTE: `loadSavedModel` accepts local paths in addition to distributed file systems such as `HDFS`, `S3`, `DBFS`, etc. This feature was introduced in Spark NLP 4.2.2 release. Keep in mind the best and recommended way to move/share/reuse Spark NLP models is to use `write.save` so you can use `.load()` from any file systems natively.st and recommended way to move/share/reuse Spark NLP models is to use `write.save` so you can use `.load()` from any file systems natively." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "3wJClaqyUH__" + }, + "outputs": [], + "source": [ + "from sparknlp.annotator import *\n", + "from sparknlp.base import *\n", + "\n", + "imageClassifier = ConvNextForImageClassification.loadSavedModel(\n", + " '{}/saved_model/1'.format(MODEL_NAME),\n", + " spark\n", + " )\\\n", + " .setInputCols([\"image_assembler\"])\\\n", + " .setOutputCol(\"class\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "T8cNjLgcUH__" + }, + "source": [ + "- Let's save it on disk so it is easier to be moved around and also be used later via `.load` function" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "zqhebAObUH__" + }, + "outputs": [], + "source": [ + "imageClassifier.write().overwrite().save(\"./{}_spark_nlp\".format(MODEL_NAME))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "yJ-9XXh7UH__" + }, + "source": [ + "Let's clean up stuff we don't need anymore" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "CiBlRajlUIAA" + }, + "outputs": [], + "source": [ + "!rm -rf {EXPORT_PATH}" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ReTnXz5pUIAA" + }, + "source": [ + "Awesome 😎 !\n", + "\n", + "This is your ONNX ConvNextForImageClassification model from HuggingFace 🤗 loaded and saved by Spark NLP 🚀" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "qRG-oxWnUIAA" + }, + "outputs": [], + "source": [ + "! ls -l {MODEL_NAME}_spark_nlp" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "cxvpC-hSUIAA" + }, + "source": [ + "Now let's see how we can use it on other machines, clusters, or any place you wish to use your new and shiny ConvNextForImageClassification model 😊" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "4_jlf5l8UIAA" + }, + "outputs": [], + "source": [ + "!wget https://raw.githubusercontent.com/JohnSnowLabs/spark-nlp/master/src/test/resources/image/hippopotamus.JPEG\n", + "from IPython.display import Image, display\n", + "display(Image(\"hippopotamus.JPEG\"))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "eglLGKeJUIAA" + }, + "outputs": [], + "source": [ + "document_assembler = ImageAssembler() \\\n", + " .setInputCol(\"image\") \\\n", + " .setOutputCol(\"image_assembler\")\n", + "\n", + "imageClassifier_loaded = ConvNextForImageClassification.load(\"./{}_spark_nlp\".format(MODEL_NAME))\\\n", + " .setInputCols([\"image_assembler\"])\\\n", + " .setOutputCol(\"class\")\n", + "\n", + "pipeline = Pipeline().setStages([\n", + " document_assembler,\n", + " imageClassifier_loaded\n", + "])\n", + "\n", + "test_image = spark.read\\\n", + " .format(\"image\")\\\n", + " .option(\"dropInvalid\", value = True)\\\n", + " .load(\"./hippopotamus.JPEG\")\n", + "\n", + "result = pipeline.fit(test_image).transform(test_image)\n", + "\n", + "result.select(\"class.result\").show(1, False)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "D65GZokYUIAA" + }, + "source": [ + "That's it! You can now go wild and use hundreds of ConvNextForImageClassification models from HuggingFace 🤗 in Spark NLP 🚀\n" + ] + } + ], + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file diff --git a/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_SwinForImageClassification.ipynb b/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_SwinForImageClassification.ipynb new file mode 100644 index 00000000000000..82c963630e030c --- /dev/null +++ b/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_SwinForImageClassification.ipynb @@ -0,0 +1,527 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "LjQoSZTMUH_5" + }, + "source": [ + "![JohnSnowLabs](https://sparknlp.org/assets/images/logo.png)\n", + "\n", + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp/blob/master/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_SwinForImageClassification.ipynb)\n", + "\n", + "# Import ONNX SwinForImageClassification models from HuggingFace 🤗 into Spark NLP 🚀\n", + "\n", + "Let's keep in mind a few things before we start 😊\n", + "\n", + "- ONNX support was introduced in `Spark NLP 5.1.0`, enabling high performance inference for models. Please make sure you have upgraded to the latest Spark NLP release.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "an8-RiT0UH_8" + }, + "source": [ + "## Export and Save HuggingFace model" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "oCNlrbMWUH_8" + }, + "source": [ + "- Let's install `transformers` package with the `onnx` extension and it's dependencies. You don't need `onnx` to be installed for Spark NLP, however, we need it to load and save models from HuggingFace.\n", + "- We lock `transformers` on version `4.31.0`. This doesn't mean it won't work with the future releases, but we wanted you to know which versions have been tested successfully." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "id": "XezgP-k2UH_8", + "outputId": "f1c913f7-882b-4385-9444-4c224649136b", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.4/7.4 MB\u001b[0m \u001b[31m21.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m424.7/424.7 kB\u001b[0m \u001b[31m7.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.8/7.8 MB\u001b[0m \u001b[31m22.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m84.5/84.5 kB\u001b[0m \u001b[31m3.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m455.8/455.8 kB\u001b[0m \u001b[31m10.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.8/6.8 MB\u001b[0m \u001b[31m29.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m212.7/212.7 kB\u001b[0m \u001b[31m12.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m46.0/46.0 kB\u001b[0m \u001b[31m1.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m547.8/547.8 kB\u001b[0m \u001b[31m31.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m15.9/15.9 MB\u001b[0m \u001b[31m44.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m55.5/55.5 kB\u001b[0m \u001b[31m5.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m21.3/21.3 MB\u001b[0m \u001b[31m25.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m86.8/86.8 kB\u001b[0m \u001b[31m4.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m39.9/39.9 MB\u001b[0m \u001b[31m10.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m116.3/116.3 kB\u001b[0m \u001b[31m11.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m64.9/64.9 kB\u001b[0m \u001b[31m6.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m194.1/194.1 kB\u001b[0m \u001b[31m15.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m13.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.1/1.1 MB\u001b[0m \u001b[31m38.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", + "cudf-cu12 24.4.1 requires pyarrow<15.0.0a0,>=14.0.1, but you have pyarrow 17.0.0 which is incompatible.\n", + "google-colab 1.0.0 requires requests==2.31.0, but you have requests 2.32.3 which is incompatible.\n", + "ibis-framework 8.0.0 requires pyarrow<16,>=2, but you have pyarrow 17.0.0 which is incompatible.\n", + "tensorflow 2.15.0 requires protobuf!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<5.0.0dev,>=3.20.3, but you have protobuf 3.20.2 which is incompatible.\n", + "tensorflow-metadata 1.15.0 requires protobuf<4.21,>=3.20.3; python_version < \"3.11\", but you have protobuf 3.20.2 which is incompatible.\u001b[0m\u001b[31m\n", + "\u001b[0m" + ] + } + ], + "source": [ + "!pip install -q --upgrade \"transformers[onnx]==4.31.0\" optimum" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "UqoI5yIUUH_9" + }, + "source": [ + "- HuggingFace has an extension called Optimum which offers specialized model inference, including ONNX. We can use this to import and export ONNX models with `from_pretrained` and `save_pretrained`.\n", + "- We'll use the [microsoft/swin-tiny-patch4-window7-224](https://huggingface.co/microsoft/swin-tiny-patch4-window7-224) model from HuggingFace as an example and export it with the `optimum-cli`." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "id": "XwylSoFOUH_9" + }, + "outputs": [], + "source": [ + "MODEL_NAME = \"microsoft/swin-tiny-patch4-window7-224\"\n", + "EXPORT_PATH = f\"export_onnx/{MODEL_NAME}\"" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "id": "OTr9oYDwUH_-", + "outputId": "43a1a595-9c36-4bb8-f03e-8399ff682753", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "2024-07-22 06:09:29.357638: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n", + "2024-07-22 06:09:29.357702: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n", + "2024-07-22 06:09:29.359695: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n", + "2024-07-22 06:09:31.049256: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n", + "Framework not specified. Using pt to export the model.\n", + "/usr/local/lib/python3.10/dist-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n", + " warnings.warn(\n", + "config.json: 100% 71.8k/71.8k [00:00<00:00, 15.9MB/s]\n", + "model.safetensors: 100% 113M/113M [00:00<00:00, 196MB/s] \n", + "Automatic task detection to image-classification.\n", + "preprocessor_config.json: 100% 255/255 [00:00<00:00, 1.45MB/s]\n", + "Could not find image processor class in the image processor config or the model config. Loading based on pattern matching with the model's feature extractor configuration.\n", + "/usr/local/lib/python3.10/dist-packages/transformers/models/vit/feature_extraction_vit.py:28: FutureWarning: The class ViTFeatureExtractor is deprecated and will be removed in version 5 of Transformers. Please use ViTImageProcessor instead.\n", + " warnings.warn(\n", + "Using the export variant default. Available variants are:\n", + " - default: The default ONNX variant.\n", + "Could not find image processor class in the image processor config or the model config. Loading based on pattern matching with the model's feature extractor configuration.\n", + "\n", + "***** Exporting submodel 1/1: SwinForImageClassification *****\n", + "Using framework PyTorch: 2.3.1+cu121\n", + "/usr/local/lib/python3.10/dist-packages/transformers/models/swin/modeling_swin.py:314: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!\n", + " if num_channels != self.num_channels:\n", + "/usr/local/lib/python3.10/dist-packages/transformers/models/swin/modeling_swin.py:304: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!\n", + " if width % self.patch_size[1] != 0:\n", + "/usr/local/lib/python3.10/dist-packages/transformers/models/swin/modeling_swin.py:307: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!\n", + " if height % self.patch_size[0] != 0:\n", + "/usr/local/lib/python3.10/dist-packages/transformers/models/swin/modeling_swin.py:611: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!\n", + " if min(input_resolution) <= self.window_size:\n", + "/usr/local/lib/python3.10/dist-packages/transformers/models/swin/modeling_swin.py:703: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!\n", + " was_padded = pad_values[3] > 0 or pad_values[5] > 0\n", + "/usr/local/lib/python3.10/dist-packages/transformers/models/swin/modeling_swin.py:704: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!\n", + " if was_padded:\n", + "/usr/local/lib/python3.10/dist-packages/transformers/models/swin/modeling_swin.py:349: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!\n", + " should_pad = (height % 2 == 1) or (width % 2 == 1)\n", + "/usr/local/lib/python3.10/dist-packages/transformers/models/swin/modeling_swin.py:350: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!\n", + " if should_pad:\n", + "/usr/local/lib/python3.10/dist-packages/transformers/models/swin/modeling_swin.py:614: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!\n", + " self.window_size = min(input_resolution)\n", + "Post-processing the exported models...\n", + "Deduplicating shared (tied) weights...\n", + "\n", + "Validating ONNX model export_onnx/microsoft/swin-tiny-patch4-window7-224/model.onnx...\n", + "\t-[✓] ONNX model output names match reference model (logits)\n", + "\t- Validating ONNX Model output \"logits\":\n", + "\t\t-[✓] (2, 1000) matches (2, 1000)\n", + "\t\t-[✓] all values close (atol: 1e-05)\n", + "The ONNX export succeeded and the exported model was saved at: export_onnx/microsoft/swin-tiny-patch4-window7-224\n" + ] + } + ], + "source": [ + "! optimum-cli export onnx --model {MODEL_NAME} {EXPORT_PATH}" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "FV4pziK4UH_-" + }, + "source": [ + "We have to move additional model assets into a seperate folder, so that Spark NLP can load it properly." + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "id": "Ar_o_tJIUH_-", + "outputId": "44bdec78-1ed9-4cb3-c194-ad56a1c4afaa", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "mv: cannot stat 'export_onnx/microsoft/swin-tiny-patch4-window7-224/*.txt': No such file or directory\n" + ] + } + ], + "source": [ + "! mkdir -p {EXPORT_PATH}/assets\n", + "! mv -t {EXPORT_PATH}/assets {EXPORT_PATH}/*.json {EXPORT_PATH}/*.txt" + ] + }, + { + "cell_type": "code", + "source": [ + "import json\n", + "\n", + "config = open(f\"{EXPORT_PATH}/assets/config.json\")\n", + "model_data = json.load(config)\n", + "json_data = json.dumps(model_data['id2label'])\n", + "# Let's make sure the id is type int and not string\n", + "new_dict = dict()\n", + "old_dict = json.loads(json_data)\n", + "for k in old_dict:\n", + " v = old_dict[k]\n", + " if type(k) == str:\n", + " k = int(k)\n", + " new_dict[v] = k\n", + "json_data = new_dict\n", + "\n", + "# now we can save the labels.json to our assets directory\n", + "with open(f'{EXPORT_PATH}/assets/labels.json', 'w') as outfile:\n", + " json.dump(json_data, outfile)\n", + " outfile.write('\\n')" + ], + "metadata": { + "id": "xlC0UD_lqrV7" + }, + "execution_count": 12, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "f6GW8l2fUH_-" + }, + "source": [ + "Let's have a look inside these two directories and see what we are dealing with:" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "id": "-WYraOCfUH_-", + "outputId": "34ddafb6-5ab8-4b09-c9e6-f114c4c31ba0", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "total 111M\n", + "drwxr-xr-x 2 root root 4.0K Jul 22 06:10 assets\n", + "-rw-r--r-- 1 root root 111M Jul 22 06:10 model.onnx\n" + ] + } + ], + "source": [ + "!ls -lh {EXPORT_PATH}" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "id": "ukZxhGpWUH_-", + "outputId": "cb8e9b97-be36-42e1-e2a1-9e0448234551", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "total 108\n", + "-rw-r--r-- 1 root root 70027 Jul 22 06:09 config.json\n", + "-rw-r--r-- 1 root root 29552 Jul 22 06:10 labels.json\n", + "-rw-r--r-- 1 root root 390 Jul 22 06:09 preprocessor_config.json\n" + ] + } + ], + "source": [ + "!ls -l {EXPORT_PATH}/assets" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pr7NE5DBUH__" + }, + "source": [ + "## Import and Save SwinForImageClassification in Spark NLP\n", + "\n", + "- Let's install and setup Spark NLP in Google Colab\n", + "- This part is pretty easy via our simple script\n", + "- Additionally, we need to upgrade Spark to version 3.4.1." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "acU9SZq-UH__" + }, + "outputs": [], + "source": [ + "! wget -q http://setup.johnsnowlabs.com/colab.sh -O - | bash\n", + "! pip install -U pyspark==3.4.1" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "yRUJ0CtfUH__" + }, + "source": [ + "Let's start Spark with Spark NLP included via our simple `start()` function" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "4kQTKjcWUH__" + }, + "outputs": [], + "source": [ + "import sparknlp\n", + "\n", + "# let's start Spark with Spark NLP\n", + "spark = sparknlp.start()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "1FIOCiZxUH__" + }, + "source": [ + "- Let's use `loadSavedModel` functon in `SwinForImageClassification` which allows us to load the ONNX model\n", + "- Most params will be set automatically. They can also be set later after loading the model in `SwinForImageClassification` during runtime, so don't worry about setting them now\n", + "- `loadSavedModel` accepts two params, first is the path to the exported model. The second is the SparkSession that is `spark` variable we previously started via `sparknlp.start()`\n", + "- NOTE: `loadSavedModel` accepts local paths in addition to distributed file systems such as `HDFS`, `S3`, `DBFS`, etc. This feature was introduced in Spark NLP 4.2.2 release. Keep in mind the best and recommended way to move/share/reuse Spark NLP models is to use `write.save` so you can use `.load()` from any file systems natively.st and recommended way to move/share/reuse Spark NLP models is to use `write.save` so you can use `.load()` from any file systems natively." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "3wJClaqyUH__" + }, + "outputs": [], + "source": [ + "from sparknlp.annotator import *\n", + "from sparknlp.base import *\n", + "\n", + "imageClassifier = SwinForImageClassification.loadSavedModel(\n", + " '{}/saved_model/1'.format(MODEL_NAME),\n", + " spark\n", + " )\\\n", + " .setInputCols([\"image_assembler\"])\\\n", + " .setOutputCol(\"class\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "T8cNjLgcUH__" + }, + "source": [ + "- Let's save it on disk so it is easier to be moved around and also be used later via `.load` function" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "zqhebAObUH__" + }, + "outputs": [], + "source": [ + "imageClassifier.write().overwrite().save(\"./{}_spark_nlp\".format(MODEL_NAME))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "yJ-9XXh7UH__" + }, + "source": [ + "Let's clean up stuff we don't need anymore" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "CiBlRajlUIAA" + }, + "outputs": [], + "source": [ + "!rm -rf {EXPORT_PATH}" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ReTnXz5pUIAA" + }, + "source": [ + "Awesome 😎 !\n", + "\n", + "This is your ONNX SwinForImageClassification model from HuggingFace 🤗 loaded and saved by Spark NLP 🚀" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "qRG-oxWnUIAA" + }, + "outputs": [], + "source": [ + "! ls -l {MODEL_NAME}_spark_nlp" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "cxvpC-hSUIAA" + }, + "source": [ + "Now let's see how we can use it on other machines, clusters, or any place you wish to use your new and shiny SwinForImageClassification model 😊" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "4_jlf5l8UIAA" + }, + "outputs": [], + "source": [ + "!wget https://raw.githubusercontent.com/JohnSnowLabs/spark-nlp/master/src/test/resources/image/hippopotamus.JPEG\n", + "from IPython.display import Image, display\n", + "display(Image(\"hippopotamus.JPEG\"))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "eglLGKeJUIAA" + }, + "outputs": [], + "source": [ + "document_assembler = ImageAssembler() \\\n", + " .setInputCol(\"image\") \\\n", + " .setOutputCol(\"image_assembler\")\n", + "\n", + "imageClassifier_loaded = SwinForImageClassification.load(\"./{}_spark_nlp\".format(MODEL_NAME))\\\n", + " .setInputCols([\"image_assembler\"])\\\n", + " .setOutputCol(\"class\")\n", + "\n", + "pipeline = Pipeline().setStages([\n", + " document_assembler,\n", + " imageClassifier_loaded\n", + "])\n", + "\n", + "test_image = spark.read\\\n", + " .format(\"image\")\\\n", + " .option(\"dropInvalid\", value = True)\\\n", + " .load(\"./hippopotamus.JPEG\")\n", + "\n", + "result = pipeline.fit(test_image).transform(test_image)\n", + "\n", + "result.select(\"class.result\").show(1, False)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "D65GZokYUIAA" + }, + "source": [ + "That's it! You can now go wild and use hundreds of SwinForImageClassification models from HuggingFace 🤗 in Spark NLP 🚀\n" + ] + } + ], + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file diff --git a/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_ViTForImageClassification.ipynb b/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_ViTForImageClassification.ipynb new file mode 100644 index 00000000000000..d4649a00de49ec --- /dev/null +++ b/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_ViTForImageClassification.ipynb @@ -0,0 +1,508 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "LjQoSZTMUH_5" + }, + "source": [ + "![JohnSnowLabs](https://sparknlp.org/assets/images/logo.png)\n", + "\n", + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp/blob/master/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_ViTForImageClassification.ipynb)\n", + "\n", + "# Import ONNX ViTForImageClassification models from HuggingFace 🤗 into Spark NLP 🚀\n", + "\n", + "Let's keep in mind a few things before we start 😊\n", + "\n", + "- ONNX support was introduced in `Spark NLP 5.1.0`, enabling high performance inference for models. Please make sure you have upgraded to the latest Spark NLP release.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "an8-RiT0UH_8" + }, + "source": [ + "## Export and Save HuggingFace model" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "oCNlrbMWUH_8" + }, + "source": [ + "- Let's install `transformers` package with the `onnx` extension and it's dependencies. You don't need `onnx` to be installed for Spark NLP, however, we need it to load and save models from HuggingFace.\n", + "- We lock `transformers` on version `4.31.0`. This doesn't mean it won't work with the future releases, but we wanted you to know which versions have been tested successfully." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "XezgP-k2UH_8", + "outputId": "5d97c384-fdb4-42f9-9edf-a4036ef8c144", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.4/7.4 MB\u001b[0m \u001b[31m15.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m424.7/424.7 kB\u001b[0m \u001b[31m15.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.8/7.8 MB\u001b[0m \u001b[31m14.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m84.5/84.5 kB\u001b[0m \u001b[31m5.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m455.8/455.8 kB\u001b[0m \u001b[31m24.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.8/6.8 MB\u001b[0m \u001b[31m57.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m212.7/212.7 kB\u001b[0m \u001b[31m17.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m46.0/46.0 kB\u001b[0m \u001b[31m3.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m547.8/547.8 kB\u001b[0m \u001b[31m28.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m15.9/15.9 MB\u001b[0m \u001b[31m46.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m55.5/55.5 kB\u001b[0m \u001b[31m4.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m21.3/21.3 MB\u001b[0m \u001b[31m35.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m86.8/86.8 kB\u001b[0m \u001b[31m8.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m39.9/39.9 MB\u001b[0m \u001b[31m11.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m116.3/116.3 kB\u001b[0m \u001b[31m8.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m64.9/64.9 kB\u001b[0m \u001b[31m5.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m194.1/194.1 kB\u001b[0m \u001b[31m15.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m12.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.1/1.1 MB\u001b[0m \u001b[31m33.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", + "cudf-cu12 24.4.1 requires pyarrow<15.0.0a0,>=14.0.1, but you have pyarrow 17.0.0 which is incompatible.\n", + "google-colab 1.0.0 requires requests==2.31.0, but you have requests 2.32.3 which is incompatible.\n", + "ibis-framework 8.0.0 requires pyarrow<16,>=2, but you have pyarrow 17.0.0 which is incompatible.\n", + "tensorflow 2.15.0 requires protobuf!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<5.0.0dev,>=3.20.3, but you have protobuf 3.20.2 which is incompatible.\n", + "tensorflow-metadata 1.15.0 requires protobuf<4.21,>=3.20.3; python_version < \"3.11\", but you have protobuf 3.20.2 which is incompatible.\u001b[0m\u001b[31m\n", + "\u001b[0m" + ] + } + ], + "source": [ + "!pip install -q --upgrade \"transformers[onnx]==4.31.0\" optimum" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "UqoI5yIUUH_9" + }, + "source": [ + "- HuggingFace has an extension called Optimum which offers specialized model inference, including ONNX. We can use this to import and export ONNX models with `from_pretrained` and `save_pretrained`.\n", + "- We'll use the [google/vit-base-patch16-224](https://huggingface.co/google/vit-base-patch16-224) model from HuggingFace as an example and export it with the `optimum-cli`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "XwylSoFOUH_9" + }, + "outputs": [], + "source": [ + "MODEL_NAME = \"google/vit-base-patch16-224\"\n", + "EXPORT_PATH = f\"export_onnx/{MODEL_NAME}\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "OTr9oYDwUH_-", + "outputId": "7e8f4245-951d-4765-a668-418786ee2243", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "2024-07-19 20:16:57.984885: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n", + "2024-07-19 20:16:57.984951: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n", + "2024-07-19 20:16:57.986892: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n", + "2024-07-19 20:16:59.731231: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n", + "Framework not specified. Using pt to export the model.\n", + "/usr/local/lib/python3.10/dist-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n", + " warnings.warn(\n", + "Automatic task detection to image-classification.\n", + "/usr/local/lib/python3.10/dist-packages/transformers/models/vit/feature_extraction_vit.py:28: FutureWarning: The class ViTFeatureExtractor is deprecated and will be removed in version 5 of Transformers. Please use ViTImageProcessor instead.\n", + " warnings.warn(\n", + "Using the export variant default. Available variants are:\n", + " - default: The default ONNX variant.\n", + "\n", + "***** Exporting submodel 1/1: ViTForImageClassification *****\n", + "Using framework PyTorch: 2.3.1+cu121\n", + "/usr/local/lib/python3.10/dist-packages/transformers/models/vit/modeling_vit.py:170: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!\n", + " if num_channels != self.num_channels:\n", + "/usr/local/lib/python3.10/dist-packages/transformers/models/vit/modeling_vit.py:176: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!\n", + " if height != self.image_size[0] or width != self.image_size[1]:\n", + "Post-processing the exported models...\n", + "Deduplicating shared (tied) weights...\n", + "\n", + "Validating ONNX model export_onnx/google/vit-base-patch16-224/model.onnx...\n", + "\t-[✓] ONNX model output names match reference model (logits)\n", + "\t- Validating ONNX Model output \"logits\":\n", + "\t\t-[✓] (2, 1000) matches (2, 1000)\n", + "\t\t-[✓] all values close (atol: 1e-05)\n", + "The ONNX export succeeded and the exported model was saved at: export_onnx/google/vit-base-patch16-224\n" + ] + } + ], + "source": [ + "! optimum-cli export onnx --model {MODEL_NAME} {EXPORT_PATH}" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "FV4pziK4UH_-" + }, + "source": [ + "We have to move additional model assets into a seperate folder, so that Spark NLP can load it properly." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Ar_o_tJIUH_-", + "outputId": "35ba97a3-ab25-413d-f2c1-dcf23dce230f", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "mv: cannot stat 'export_onnx/google/vit-base-patch16-224/*.txt': No such file or directory\n" + ] + } + ], + "source": [ + "! mkdir -p {EXPORT_PATH}/assets\n", + "! mv -t {EXPORT_PATH}/assets {EXPORT_PATH}/*.json {EXPORT_PATH}/*.txt" + ] + }, + { + "cell_type": "code", + "source": [ + "import json\n", + "\n", + "config = open(f\"{EXPORT_PATH}/assets/config.json\")\n", + "model_data = json.load(config)\n", + "json_data = json.dumps(model_data['id2label'])\n", + "# Let's make sure the id is type int and not string\n", + "new_dict = dict()\n", + "old_dict = json.loads(json_data)\n", + "for k in old_dict:\n", + " v = old_dict[k]\n", + " if type(k) == str:\n", + " k = int(k)\n", + " new_dict[v] = k\n", + "json_data = new_dict\n", + "\n", + "# now we can save the labels.json to our assets directory\n", + "with open(f'{EXPORT_PATH}/assets/labels.json', 'w') as outfile:\n", + " json.dump(json_data, outfile)\n", + " outfile.write('\\n')" + ], + "metadata": { + "id": "xlC0UD_lqrV7" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "f6GW8l2fUH_-" + }, + "source": [ + "Let's have a look inside these two directories and see what we are dealing with:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "-WYraOCfUH_-", + "outputId": "7302d1cd-dab2-47ed-ced9-e9f8ea6a2452", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "total 338428\n", + "drwxr-xr-x 2 root root 4096 Jul 19 20:24 assets\n", + "-rw-r--r-- 1 root root 346543003 Jul 19 20:17 model.onnx\n" + ] + } + ], + "source": [ + "!ls -l {EXPORT_PATH}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ukZxhGpWUH_-", + "outputId": "324c928c-c650-4675-b316-2ad58ed2f2c1", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "total 108\n", + "-rw-r--r-- 1 root root 69684 Jul 19 20:20 config.json\n", + "-rw-r--r-- 1 root root 29552 Jul 19 20:24 labels.json\n", + "-rw-r--r-- 1 root root 327 Jul 19 20:17 preprocessor_config.json\n" + ] + } + ], + "source": [ + "!ls -l {EXPORT_PATH}/assets" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pr7NE5DBUH__" + }, + "source": [ + "## Import and Save ViTForImageClassification in Spark NLP\n", + "\n", + "- Let's install and setup Spark NLP in Google Colab\n", + "- This part is pretty easy via our simple script\n", + "- Additionally, we need to upgrade Spark to version 3.4.1." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "acU9SZq-UH__" + }, + "outputs": [], + "source": [ + "! wget -q http://setup.johnsnowlabs.com/colab.sh -O - | bash\n", + "! pip install -U pyspark==3.4.1" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "yRUJ0CtfUH__" + }, + "source": [ + "Let's start Spark with Spark NLP included via our simple `start()` function" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "4kQTKjcWUH__" + }, + "outputs": [], + "source": [ + "import sparknlp\n", + "\n", + "# let's start Spark with Spark NLP\n", + "spark = sparknlp.start()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "1FIOCiZxUH__" + }, + "source": [ + "- Let's use `loadSavedModel` functon in `ViTForImageClassification` which allows us to load the ONNX model\n", + "- Most params will be set automatically. They can also be set later after loading the model in `ViTForImageClassification` during runtime, so don't worry about setting them now\n", + "- `loadSavedModel` accepts two params, first is the path to the exported model. The second is the SparkSession that is `spark` variable we previously started via `sparknlp.start()`\n", + "- NOTE: `loadSavedModel` accepts local paths in addition to distributed file systems such as `HDFS`, `S3`, `DBFS`, etc. This feature was introduced in Spark NLP 4.2.2 release. Keep in mind the best and recommended way to move/share/reuse Spark NLP models is to use `write.save` so you can use `.load()` from any file systems natively.st and recommended way to move/share/reuse Spark NLP models is to use `write.save` so you can use `.load()` from any file systems natively." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "3wJClaqyUH__" + }, + "outputs": [], + "source": [ + "from sparknlp.annotator import *\n", + "from sparknlp.base import *\n", + "\n", + "imageClassifier = ViTForImageClassification.loadSavedModel(\n", + " '{}/saved_model/1'.format(MODEL_NAME),\n", + " spark\n", + " )\\\n", + " .setInputCols([\"image_assembler\"])\\\n", + " .setOutputCol(\"class\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "T8cNjLgcUH__" + }, + "source": [ + "- Let's save it on disk so it is easier to be moved around and also be used later via `.load` function" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "zqhebAObUH__" + }, + "outputs": [], + "source": [ + "imageClassifier.write().overwrite().save(\"./{}_spark_nlp\".format(MODEL_NAME))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "yJ-9XXh7UH__" + }, + "source": [ + "Let's clean up stuff we don't need anymore" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "CiBlRajlUIAA" + }, + "outputs": [], + "source": [ + "!rm -rf {EXPORT_PATH}" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ReTnXz5pUIAA" + }, + "source": [ + "Awesome 😎 !\n", + "\n", + "This is your ONNX ViTForImageClassification model from HuggingFace 🤗 loaded and saved by Spark NLP 🚀" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "qRG-oxWnUIAA" + }, + "outputs": [], + "source": [ + "! ls -l {MODEL_NAME}_spark_nlp" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "cxvpC-hSUIAA" + }, + "source": [ + "Now let's see how we can use it on other machines, clusters, or any place you wish to use your new and shiny ViTForImageClassification model 😊" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "4_jlf5l8UIAA" + }, + "outputs": [], + "source": [ + "!wget https://raw.githubusercontent.com/JohnSnowLabs/spark-nlp/master/src/test/resources/image/hippopotamus.JPEG\n", + "from IPython.display import Image, display\n", + "display(Image(\"hippopotamus.JPEG\"))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "eglLGKeJUIAA" + }, + "outputs": [], + "source": [ + "document_assembler = ImageAssembler() \\\n", + " .setInputCol(\"image\") \\\n", + " .setOutputCol(\"image_assembler\")\n", + "\n", + "imageClassifier_loaded = ViTForImageClassification.load(\"./{}_spark_nlp\".format(MODEL_NAME))\\\n", + " .setInputCols([\"image_assembler\"])\\\n", + " .setOutputCol(\"class\")\n", + "\n", + "pipeline = Pipeline().setStages([\n", + " document_assembler,\n", + " imageClassifier_loaded\n", + "])\n", + "\n", + "test_image = spark.read\\\n", + " .format(\"image\")\\\n", + " .option(\"dropInvalid\", value = True)\\\n", + " .load(\"./hippopotamus.JPEG\")\n", + "\n", + "result = pipeline.fit(test_image).transform(test_image)\n", + "\n", + "result.select(\"class.result\").show(1, False)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "D65GZokYUIAA" + }, + "source": [ + "That's it! You can now go wild and use hundreds of ViTForImageClassification models from HuggingFace 🤗 in Spark NLP 🚀\n" + ] + } + ], + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file diff --git a/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_VisionEncoderDecoderForImageCaptioning_.ipynb b/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_VisionEncoderDecoderForImageCaptioning_.ipynb new file mode 100644 index 00000000000000..6dcf46e85fed3d --- /dev/null +++ b/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_VisionEncoderDecoderForImageCaptioning_.ipynb @@ -0,0 +1,669 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "LjQoSZTMUH_5" + }, + "source": [ + "![JohnSnowLabs](https://sparknlp.org/assets/images/logo.png)\n", + "\n", + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp/blob/master/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_VisionEncoderDecoderForImageCaptioning.ipynb)\n", + "\n", + "# Import ONNX VisionEncoderDecoderForImageCaptioning models from HuggingFace 🤗 into Spark NLP 🚀\n", + "\n", + "Let's keep in mind a few things before we start 😊\n", + "\n", + "- ONNX support was introduced in `Spark NLP 5.1.0`, enabling high performance inference for models. Please make sure you have upgraded to the latest Spark NLP release.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "an8-RiT0UH_8" + }, + "source": [ + "## Export and Save HuggingFace model" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "oCNlrbMWUH_8" + }, + "source": [ + "- Let's install `transformers` package with the `onnx` extension and it's dependencies. You don't need `onnx` to be installed for Spark NLP, however, we need it to load and save models from HuggingFace.\n", + "- We lock `transformers` on version `4.31.0`. This doesn't mean it won't work with the future releases, but we wanted you to know which versions have been tested successfully." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "XezgP-k2UH_8", + "outputId": "ee92aff7-7d19-4853-8c89-cb58c1a7d675" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m116.9/116.9 kB\u001b[0m \u001b[31m2.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.4/7.4 MB\u001b[0m \u001b[31m31.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m424.7/424.7 kB\u001b[0m \u001b[31m13.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.8/6.8 MB\u001b[0m \u001b[31m28.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m212.7/212.7 kB\u001b[0m \u001b[31m6.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.8/7.8 MB\u001b[0m \u001b[31m31.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m46.0/46.0 kB\u001b[0m \u001b[31m69.0 kB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m547.8/547.8 kB\u001b[0m \u001b[31m14.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m64.9/64.9 kB\u001b[0m \u001b[31m3.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m84.5/84.5 kB\u001b[0m \u001b[31m5.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.1/1.1 MB\u001b[0m \u001b[31m27.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m455.8/455.8 kB\u001b[0m \u001b[31m14.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m116.3/116.3 kB\u001b[0m \u001b[31m3.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m316.1/316.1 kB\u001b[0m \u001b[31m17.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m86.8/86.8 kB\u001b[0m \u001b[31m4.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m15.9/15.9 MB\u001b[0m \u001b[31m48.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m39.9/39.9 MB\u001b[0m \u001b[31m15.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m9.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m55.5/55.5 kB\u001b[0m \u001b[31m4.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m194.1/194.1 kB\u001b[0m \u001b[31m15.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m21.3/21.3 MB\u001b[0m \u001b[31m50.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", + "cudf-cu12 24.4.1 requires pyarrow<15.0.0a0,>=14.0.1, but you have pyarrow 17.0.0 which is incompatible.\n", + "gcsfs 2024.6.1 requires fsspec==2024.6.1, but you have fsspec 2024.5.0 which is incompatible.\n", + "google-colab 1.0.0 requires requests==2.31.0, but you have requests 2.32.3 which is incompatible.\n", + "ibis-framework 8.0.0 requires pyarrow<16,>=2, but you have pyarrow 17.0.0 which is incompatible.\n", + "tensorflow 2.15.0 requires protobuf!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<5.0.0dev,>=3.20.3, but you have protobuf 3.20.2 which is incompatible.\n", + "tensorflow-metadata 1.15.0 requires protobuf<4.21,>=3.20.3; python_version < \"3.11\", but you have protobuf 3.20.2 which is incompatible.\u001b[0m\u001b[31m\n", + "\u001b[0m" + ] + } + ], + "source": [ + "!pip install -q --upgrade \"transformers[onnx]==4.31.0\" optimum" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "UqoI5yIUUH_9" + }, + "source": [ + "- HuggingFace has an extension called Optimum which offers specialized model inference, including ONNX. We can use this to import and export ONNX models with `from_pretrained` and `save_pretrained`.\n", + "- We'll use the [nlpconnect/vit-gpt2-image-captioning](https://huggingface.co/nlpconnect/vit-gpt2-image-captioning) model from HuggingFace as an example and export it with the `optimum-cli`." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "id": "XwylSoFOUH_9" + }, + "outputs": [], + "source": [ + "MODEL_NAME = \"nlpconnect/vit-gpt2-image-captioning\"\n", + "EXPORT_PATH = f\"export_onnx/{MODEL_NAME}\"" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "OTr9oYDwUH_-", + "outputId": "ff4b9dd8-7198-4373-859d-a9fe7595d201" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "2024-07-30 10:43:01.658251: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n", + "2024-07-30 10:43:01.658332: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n", + "2024-07-30 10:43:01.741181: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n", + "2024-07-30 10:43:03.588057: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n", + "Framework not specified. Using pt to export the model.\n", + "/usr/local/lib/python3.10/dist-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n", + " warnings.warn(\n", + "config.json: 100% 4.61k/4.61k [00:00<00:00, 18.4MB/s]\n", + "pytorch_model.bin: 100% 982M/982M [00:10<00:00, 95.9MB/s]\n", + "Automatic task detection to image-to-text-with-past.\n", + "tokenizer_config.json: 100% 241/241 [00:00<00:00, 1.20MB/s]\n", + "vocab.json: 100% 798k/798k [00:00<00:00, 4.53MB/s]\n", + "merges.txt: 100% 456k/456k [00:00<00:00, 2.41MB/s]\n", + "tokenizer.json: 100% 1.36M/1.36M [00:00<00:00, 26.2MB/s]\n", + "special_tokens_map.json: 100% 120/120 [00:00<00:00, 510kB/s]\n", + "preprocessor_config.json: 100% 228/228 [00:00<00:00, 1.11MB/s]\n", + "/usr/local/lib/python3.10/dist-packages/transformers/models/vit/feature_extraction_vit.py:28: FutureWarning: The class ViTFeatureExtractor is deprecated and will be removed in version 5 of Transformers. Please use ViTImageProcessor instead.\n", + " warnings.warn(\n", + "Using the export variant default. Available variants are:\n", + " - default: The default ONNX variant.\n", + "\n", + "***** Exporting submodel 1/3: ViTModel *****\n", + "Using framework PyTorch: 2.3.1+cu121\n", + "Overriding 1 configuration item(s)\n", + "\t- use_cache -> False\n", + "/usr/local/lib/python3.10/dist-packages/transformers/models/vit/modeling_vit.py:170: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!\n", + " if num_channels != self.num_channels:\n", + "/usr/local/lib/python3.10/dist-packages/transformers/models/vit/modeling_vit.py:176: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!\n", + " if height != self.image_size[0] or width != self.image_size[1]:\n", + "\n", + "***** Exporting submodel 2/3: VisionEncoderDecoderModel *****\n", + "Using framework PyTorch: 2.3.1+cu121\n", + "Overriding 1 configuration item(s)\n", + "\t- use_cache -> True\n", + "\n", + "***** Exporting submodel 3/3: VisionEncoderDecoderModel *****\n", + "Using framework PyTorch: 2.3.1+cu121\n", + "Overriding 1 configuration item(s)\n", + "\t- use_cache -> True\n", + "Post-processing the exported models...\n", + "Deduplicating shared (tied) weights...\n", + "Found different candidate ONNX initializers (likely duplicate) for the tied weights:\n", + "\tdecoder.lm_head.weight: {'onnx::MatMul_5246'}\n", + "\tdecoder.transformer.wte.weight: {'decoder.transformer.wte.weight'}\n", + "Removing duplicate initializer onnx::MatMul_5246...\n", + "Found different candidate ONNX initializers (likely duplicate) for the tied weights:\n", + "\tdecoder.lm_head.weight: {'onnx::MatMul_5245'}\n", + "\tdecoder.transformer.wte.weight: {'decoder.transformer.wte.weight'}\n", + "Removing duplicate initializer onnx::MatMul_5245...\n", + "\n", + "Validating ONNX model export_onnx/nlpconnect/vit-gpt2-image-captioning/encoder_model.onnx...\n", + "\t-[✓] ONNX model output names match reference model (last_hidden_state)\n", + "\t- Validating ONNX Model output \"last_hidden_state\":\n", + "\t\t-[✓] (2, 197, 768) matches (2, 197, 768)\n", + "\t\t-[✓] all values close (atol: 0.001)\n", + "\n", + "Validating ONNX model export_onnx/nlpconnect/vit-gpt2-image-captioning/decoder_model_merged.onnx...\n", + "\t-[✓] ONNX model output names match reference model (present.2.value, present.8.key, present.5.key, present.4.value, present.6.value, present.10.value, present.1.value, present.11.value, present.9.key, present.3.key, present.6.key, present.10.key, present.11.key, present.0.value, present.3.value, present.1.key, logits, present.2.key, present.5.value, present.7.key, present.0.key, present.7.value, present.4.key, present.8.value, present.9.value)\n", + "\t- Validating ONNX Model output \"logits\":\n", + "\t\t-[✓] (2, 16, 50257) matches (2, 16, 50257)\n", + "\t\t-[✓] all values close (atol: 0.001)\n", + "\t- Validating ONNX Model output \"present.0.key\":\n", + "\t\t-[✓] (2, 12, 16, 64) matches (2, 12, 16, 64)\n", + "\t\t-[✓] all values close (atol: 0.001)\n", + "\t- Validating ONNX Model output \"present.0.value\":\n", + "\t\t-[✓] (2, 12, 16, 64) matches (2, 12, 16, 64)\n", + "\t\t-[✓] all values close (atol: 0.001)\n", + "\t- Validating ONNX Model output \"present.1.key\":\n", + "\t\t-[✓] (2, 12, 16, 64) matches (2, 12, 16, 64)\n", + "\t\t-[✓] all values close (atol: 0.001)\n", + "\t- Validating ONNX Model output \"present.1.value\":\n", + "\t\t-[✓] (2, 12, 16, 64) matches (2, 12, 16, 64)\n", + "\t\t-[✓] all values close (atol: 0.001)\n", + "\t- Validating ONNX Model output \"present.2.key\":\n", + "\t\t-[✓] (2, 12, 16, 64) matches (2, 12, 16, 64)\n", + "\t\t-[✓] all values close (atol: 0.001)\n", + "\t- Validating ONNX Model output \"present.2.value\":\n", + "\t\t-[✓] (2, 12, 16, 64) matches (2, 12, 16, 64)\n", + "\t\t-[✓] all values close (atol: 0.001)\n", + "\t- Validating ONNX Model output \"present.3.key\":\n", + "\t\t-[✓] (2, 12, 16, 64) matches (2, 12, 16, 64)\n", + "\t\t-[✓] all values close (atol: 0.001)\n", + "\t- Validating ONNX Model output \"present.3.value\":\n", + "\t\t-[✓] (2, 12, 16, 64) matches (2, 12, 16, 64)\n", + "\t\t-[✓] all values close (atol: 0.001)\n", + "\t- Validating ONNX Model output \"present.4.key\":\n", + "\t\t-[✓] (2, 12, 16, 64) matches (2, 12, 16, 64)\n", + "\t\t-[✓] all values close (atol: 0.001)\n", + "\t- Validating ONNX Model output \"present.4.value\":\n", + "\t\t-[✓] (2, 12, 16, 64) matches (2, 12, 16, 64)\n", + "\t\t-[✓] all values close (atol: 0.001)\n", + "\t- Validating ONNX Model output \"present.5.key\":\n", + "\t\t-[✓] (2, 12, 16, 64) matches (2, 12, 16, 64)\n", + "\t\t-[✓] all values close (atol: 0.001)\n", + "\t- Validating ONNX Model output \"present.5.value\":\n", + "\t\t-[✓] (2, 12, 16, 64) matches (2, 12, 16, 64)\n", + "\t\t-[✓] all values close (atol: 0.001)\n", + "\t- Validating ONNX Model output \"present.6.key\":\n", + "\t\t-[✓] (2, 12, 16, 64) matches (2, 12, 16, 64)\n", + "\t\t-[✓] all values close (atol: 0.001)\n", + "\t- Validating ONNX Model output \"present.6.value\":\n", + "\t\t-[✓] (2, 12, 16, 64) matches (2, 12, 16, 64)\n", + "\t\t-[✓] all values close (atol: 0.001)\n", + "\t- Validating ONNX Model output \"present.7.key\":\n", + "\t\t-[✓] (2, 12, 16, 64) matches (2, 12, 16, 64)\n", + "\t\t-[✓] all values close (atol: 0.001)\n", + "\t- Validating ONNX Model output \"present.7.value\":\n", + "\t\t-[✓] (2, 12, 16, 64) matches (2, 12, 16, 64)\n", + "\t\t-[✓] all values close (atol: 0.001)\n", + "\t- Validating ONNX Model output \"present.8.key\":\n", + "\t\t-[✓] (2, 12, 16, 64) matches (2, 12, 16, 64)\n", + "\t\t-[✓] all values close (atol: 0.001)\n", + "\t- Validating ONNX Model output \"present.8.value\":\n", + "\t\t-[✓] (2, 12, 16, 64) matches (2, 12, 16, 64)\n", + "\t\t-[✓] all values close (atol: 0.001)\n", + "\t- Validating ONNX Model output \"present.9.key\":\n", + "\t\t-[✓] (2, 12, 16, 64) matches (2, 12, 16, 64)\n", + "\t\t-[✓] all values close (atol: 0.001)\n", + "\t- Validating ONNX Model output \"present.9.value\":\n", + "\t\t-[✓] (2, 12, 16, 64) matches (2, 12, 16, 64)\n", + "\t\t-[✓] all values close (atol: 0.001)\n", + "\t- Validating ONNX Model output \"present.10.key\":\n", + "\t\t-[✓] (2, 12, 16, 64) matches (2, 12, 16, 64)\n", + "\t\t-[✓] all values close (atol: 0.001)\n", + "\t- Validating ONNX Model output \"present.10.value\":\n", + "\t\t-[✓] (2, 12, 16, 64) matches (2, 12, 16, 64)\n", + "\t\t-[✓] all values close (atol: 0.001)\n", + "\t- Validating ONNX Model output \"present.11.key\":\n", + "\t\t-[✓] (2, 12, 16, 64) matches (2, 12, 16, 64)\n", + "\t\t-[✓] all values close (atol: 0.001)\n", + "\t- Validating ONNX Model output \"present.11.value\":\n", + "\t\t-[✓] (2, 12, 16, 64) matches (2, 12, 16, 64)\n", + "\t\t-[✓] all values close (atol: 0.001)\n", + "\n", + "Validating ONNX model export_onnx/nlpconnect/vit-gpt2-image-captioning/decoder_model_merged.onnx...\n", + "\t-[✓] ONNX model output names match reference model (present.2.value, present.8.key, present.5.key, present.4.value, present.6.value, present.10.value, present.1.value, present.11.value, present.9.key, present.3.key, present.6.key, present.10.key, present.11.key, present.0.value, present.3.value, present.1.key, logits, present.2.key, present.5.value, present.7.key, present.0.key, present.7.value, present.4.key, present.8.value, present.9.value)\n", + "\t- Validating ONNX Model output \"logits\":\n", + "\t\t-[✓] (2, 1, 50257) matches (2, 1, 50257)\n", + "\t\t-[✓] all values close (atol: 0.001)\n", + "\t- Validating ONNX Model output \"present.0.key\":\n", + "\t\t-[✓] (2, 12, 17, 64) matches (2, 12, 17, 64)\n", + "\t\t-[✓] all values close (atol: 0.001)\n", + "\t- Validating ONNX Model output \"present.0.value\":\n", + "\t\t-[✓] (2, 12, 17, 64) matches (2, 12, 17, 64)\n", + "\t\t-[✓] all values close (atol: 0.001)\n", + "\t- Validating ONNX Model output \"present.1.key\":\n", + "\t\t-[✓] (2, 12, 17, 64) matches (2, 12, 17, 64)\n", + "\t\t-[✓] all values close (atol: 0.001)\n", + "\t- Validating ONNX Model output \"present.1.value\":\n", + "\t\t-[✓] (2, 12, 17, 64) matches (2, 12, 17, 64)\n", + "\t\t-[✓] all values close (atol: 0.001)\n", + "\t- Validating ONNX Model output \"present.2.key\":\n", + "\t\t-[✓] (2, 12, 17, 64) matches (2, 12, 17, 64)\n", + "\t\t-[✓] all values close (atol: 0.001)\n", + "\t- Validating ONNX Model output \"present.2.value\":\n", + "\t\t-[✓] (2, 12, 17, 64) matches (2, 12, 17, 64)\n", + "\t\t-[✓] all values close (atol: 0.001)\n", + "\t- Validating ONNX Model output \"present.3.key\":\n", + "\t\t-[✓] (2, 12, 17, 64) matches (2, 12, 17, 64)\n", + "\t\t-[✓] all values close (atol: 0.001)\n", + "\t- Validating ONNX Model output \"present.3.value\":\n", + "\t\t-[✓] (2, 12, 17, 64) matches (2, 12, 17, 64)\n", + "\t\t-[✓] all values close (atol: 0.001)\n", + "\t- Validating ONNX Model output \"present.4.key\":\n", + "\t\t-[✓] (2, 12, 17, 64) matches (2, 12, 17, 64)\n", + "\t\t-[✓] all values close (atol: 0.001)\n", + "\t- Validating ONNX Model output \"present.4.value\":\n", + "\t\t-[✓] (2, 12, 17, 64) matches (2, 12, 17, 64)\n", + "\t\t-[✓] all values close (atol: 0.001)\n", + "\t- Validating ONNX Model output \"present.5.key\":\n", + "\t\t-[✓] (2, 12, 17, 64) matches (2, 12, 17, 64)\n", + "\t\t-[✓] all values close (atol: 0.001)\n", + "\t- Validating ONNX Model output \"present.5.value\":\n", + "\t\t-[✓] (2, 12, 17, 64) matches (2, 12, 17, 64)\n", + "\t\t-[✓] all values close (atol: 0.001)\n", + "\t- Validating ONNX Model output \"present.6.key\":\n", + "\t\t-[✓] (2, 12, 17, 64) matches (2, 12, 17, 64)\n", + "\t\t-[✓] all values close (atol: 0.001)\n", + "\t- Validating ONNX Model output \"present.6.value\":\n", + "\t\t-[✓] (2, 12, 17, 64) matches (2, 12, 17, 64)\n", + "\t\t-[✓] all values close (atol: 0.001)\n", + "\t- Validating ONNX Model output \"present.7.key\":\n", + "\t\t-[✓] (2, 12, 17, 64) matches (2, 12, 17, 64)\n", + "\t\t-[✓] all values close (atol: 0.001)\n", + "\t- Validating ONNX Model output \"present.7.value\":\n", + "\t\t-[✓] (2, 12, 17, 64) matches (2, 12, 17, 64)\n", + "\t\t-[✓] all values close (atol: 0.001)\n", + "\t- Validating ONNX Model output \"present.8.key\":\n", + "\t\t-[✓] (2, 12, 17, 64) matches (2, 12, 17, 64)\n", + "\t\t-[✓] all values close (atol: 0.001)\n", + "\t- Validating ONNX Model output \"present.8.value\":\n", + "\t\t-[✓] (2, 12, 17, 64) matches (2, 12, 17, 64)\n", + "\t\t-[✓] all values close (atol: 0.001)\n", + "\t- Validating ONNX Model output \"present.9.key\":\n", + "\t\t-[✓] (2, 12, 17, 64) matches (2, 12, 17, 64)\n", + "\t\t-[✓] all values close (atol: 0.001)\n", + "\t- Validating ONNX Model output \"present.9.value\":\n", + "\t\t-[✓] (2, 12, 17, 64) matches (2, 12, 17, 64)\n", + "\t\t-[✓] all values close (atol: 0.001)\n", + "\t- Validating ONNX Model output \"present.10.key\":\n", + "\t\t-[✓] (2, 12, 17, 64) matches (2, 12, 17, 64)\n", + "\t\t-[✓] all values close (atol: 0.001)\n", + "\t- Validating ONNX Model output \"present.10.value\":\n", + "\t\t-[✓] (2, 12, 17, 64) matches (2, 12, 17, 64)\n", + "\t\t-[✓] all values close (atol: 0.001)\n", + "\t- Validating ONNX Model output \"present.11.key\":\n", + "\t\t-[✓] (2, 12, 17, 64) matches (2, 12, 17, 64)\n", + "\t\t-[✓] all values close (atol: 0.001)\n", + "\t- Validating ONNX Model output \"present.11.value\":\n", + "\t\t-[✓] (2, 12, 17, 64) matches (2, 12, 17, 64)\n", + "\t\t-[✓] all values close (atol: 0.001)\n", + "The ONNX export succeeded and the exported model was saved at: export_onnx/nlpconnect/vit-gpt2-image-captioning\n" + ] + } + ], + "source": [ + "! optimum-cli export onnx --model {MODEL_NAME} {EXPORT_PATH}" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "FV4pziK4UH_-" + }, + "source": [ + "We have to move additional model assets into a seperate folder, so that Spark NLP can load it properly." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "id": "Ar_o_tJIUH_-" + }, + "outputs": [], + "source": [ + "! mkdir -p {EXPORT_PATH}/assets\n", + "! mv -t {EXPORT_PATH}/assets {EXPORT_PATH}/*.json {EXPORT_PATH}/*.txt" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "f6GW8l2fUH_-" + }, + "source": [ + "Let's have a look inside these two directories and see what we are dealing with:" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "-WYraOCfUH_-", + "outputId": "4203c965-b031-4d4a-9631-2eb1408e5158" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "total 2133548\n", + "drwxr-xr-x 2 root root 4096 Jul 30 10:45 assets\n", + "-rw-r--r-- 1 root root 615029740 Jul 30 10:45 decoder_model_merged.onnx\n", + "-rw-r--r-- 1 root root 613132137 Jul 30 10:44 decoder_model.onnx\n", + "-rw-r--r-- 1 root root 613129445 Jul 30 10:44 decoder_with_past_model.onnx\n", + "-rw-r--r-- 1 root root 343440610 Jul 30 10:43 encoder_model.onnx\n" + ] + } + ], + "source": [ + "!ls -l {EXPORT_PATH}" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "ukZxhGpWUH_-", + "outputId": "02984684-8d25-43df-fc31-ad6a8d7bcf7c" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "total 3312\n", + "-rw-r--r-- 1 root root 5038 Jul 30 10:43 config.json\n", + "-rw-r--r-- 1 root root 179 Jul 30 10:43 generation_config.json\n", + "-rw-r--r-- 1 root root 456318 Jul 30 10:43 merges.txt\n", + "-rw-r--r-- 1 root root 378 Jul 30 10:43 preprocessor_config.json\n", + "-rw-r--r-- 1 root root 131 Jul 30 10:43 special_tokens_map.json\n", + "-rw-r--r-- 1 root root 234 Jul 30 10:43 tokenizer_config.json\n", + "-rw-r--r-- 1 root root 2107928 Jul 30 10:43 tokenizer.json\n", + "-rw-r--r-- 1 root root 798156 Jul 30 10:43 vocab.json\n" + ] + } + ], + "source": [ + "!ls -l {EXPORT_PATH}/assets" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pr7NE5DBUH__" + }, + "source": [ + "## Import and Save VisionEncoderDecoderForImageCaptioning in Spark NLP\n", + "\n", + "- Let's install and setup Spark NLP in Google Colab\n", + "- This part is pretty easy via our simple script\n", + "- Additionally, we need to upgrade Spark to version 3.4.1." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "acU9SZq-UH__" + }, + "outputs": [], + "source": [ + "! wget -q http://setup.johnsnowlabs.com/colab.sh -O - | bash\n", + "! pip install -U pyspark==3.4.1" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "yRUJ0CtfUH__" + }, + "source": [ + "Let's start Spark with Spark NLP included via our simple `start()` function" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "4kQTKjcWUH__" + }, + "outputs": [], + "source": [ + "import sparknlp\n", + "\n", + "# let's start Spark with Spark NLP\n", + "spark = sparknlp.start()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "1FIOCiZxUH__" + }, + "source": [ + "- Let's use `loadSavedModel` functon in `VisionEncoderDecoderForImageCaptioning ` which allows us to load the ONNX model\n", + "- Most params will be set automatically. They can also be set later after loading the model in `VisionEncoderDecoderForImageCaptioning ` during runtime, so don't worry about setting them now\n", + "- `loadSavedModel` accepts two params, first is the path to the exported model. The second is the SparkSession that is `spark` variable we previously started via `sparknlp.start()`\n", + "- NOTE: `loadSavedModel` accepts local paths in addition to distributed file systems such as `HDFS`, `S3`, `DBFS`, etc. This feature was introduced in Spark NLP 4.2.2 release. Keep in mind the best and recommended way to move/share/reuse Spark NLP models is to use `write.save` so you can use `.load()` from any file systems natively.st and recommended way to move/share/reuse Spark NLP models is to use `write.save` so you can use `.load()` from any file systems natively." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "3wJClaqyUH__" + }, + "outputs": [], + "source": [ + "from sparknlp.annotator import *\n", + "from sparknlp.base import *\n", + "\n", + "imageClassifier = VisionEncoderDecoderForImageCaptioning .loadSavedModel(\n", + " '{}/saved_model/1'.format(MODEL_NAME),\n", + " spark\n", + " )\\\n", + " .setInputCols([\"image_assembler\"])\\\n", + " .setOutputCol(\"caption\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "T8cNjLgcUH__" + }, + "source": [ + "- Let's save it on disk so it is easier to be moved around and also be used later via `.load` function" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "zqhebAObUH__" + }, + "outputs": [], + "source": [ + "imageClassifier.write().overwrite().save(\"./{}_spark_nlp\".format(MODEL_NAME))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "yJ-9XXh7UH__" + }, + "source": [ + "Let's clean up stuff we don't need anymore" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "CiBlRajlUIAA" + }, + "outputs": [], + "source": [ + "!rm -rf {EXPORT_PATH}" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ReTnXz5pUIAA" + }, + "source": [ + "Awesome 😎 !\n", + "\n", + "This is your ONNX VisionEncoderDecoderForImageCaptioning model from HuggingFace 🤗 loaded and saved by Spark NLP 🚀" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "qRG-oxWnUIAA" + }, + "outputs": [], + "source": [ + "! ls -l {MODEL_NAME}_spark_nlp" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "cxvpC-hSUIAA" + }, + "source": [ + "Now let's see how we can use it on other machines, clusters, or any place you wish to use your new and shiny VisionEncoderDecoderForImageCaptioning model 😊" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "4_jlf5l8UIAA" + }, + "outputs": [], + "source": [ + "!wget https://raw.githubusercontent.com/JohnSnowLabs/spark-nlp/master/src/test/resources/image/hippopotamus.JPEG\n", + "from IPython.display import Image, display\n", + "display(Image(\"hippopotamus.JPEG\"))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "eglLGKeJUIAA" + }, + "outputs": [], + "source": [ + "document_assembler = ImageAssembler() \\\n", + " .setInputCol(\"image\") \\\n", + " .setOutputCol(\"image_assembler\")\n", + "\n", + "imageClassifier_loaded = ConvNextForImageClassification\\\n", + " .setInputCols([\"image_assembler\"])\\\n", + " .setOutputCol(\"class\")\n", + "\n", + "imageCaptioning = VisionEncoderDecoderForImageCaptioning.load(\"./{}_spark_nlp\".format(MODEL_NAME))\\\n", + " .setBeamSize(2) \\\n", + " .setDoSample(False) \\\n", + " .setInputCols([\"image_assembler\"]) \\\n", + " .setOutputCol(\"caption\")\n", + "\n", + "pipeline = Pipeline().setStages([\n", + " document_assembler,\n", + " imageCaptioning\n", + "])\n", + "\n", + "test_image = spark.read\\\n", + " .format(\"image\")\\\n", + " .option(\"dropInvalid\", value = True)\\\n", + " .load(\"./hippopotamus.JPEG\")\n", + "\n", + "result = pipeline.fit(test_image).transform(test_image)\n", + "result \\\n", + " .selectExpr(\"reverse(split(image.origin, '/'))[0] as image_name\", \"caption.result\") \\\n", + " .show(truncate = False)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "D65GZokYUIAA" + }, + "source": [ + "That's it! You can now go wild and use hundreds of VisionEncoderDecoderForImageCaptioning models from HuggingFace 🤗 in Spark NLP 🚀\n" + ] + } + ], + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file diff --git a/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_snowflake.ipynb b/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_snowflake.ipynb new file mode 100644 index 00000000000000..2fe1cdc0adcfb8 --- /dev/null +++ b/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_snowflake.ipynb @@ -0,0 +1,2747 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "8uaoXS8d-LBQ" + }, + "source": [ + "![JohnSnowLabs](https://sparknlp.org/assets/images/logo.png)\n", + "\n", + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp/blob/master/examples/python/transformers/onnx/HuggingFace_ONNX_in_Spark_NLP_snowflake.ipynb)\n", + "\n", + "# Import ONNX snowflake models from HuggingFace 🤗 into Spark NLP 🚀\n", + "\n", + "Let's keep in mind a few things before we start 😊\n", + "\n", + "- ONNX support was introduced in `Spark NLP 5.0.0`, enabling high performance inference for models. Please make sure you have upgraded to the latest Spark NLP release.\n", + "- You can import models for snowflake from HuggingFace and they have to be in `Fill Mask` category. Meaning, you cannot use snowflake models trained/fine-tuned on a specific task such as token/sequence classification." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "NkxaTXf3-LBT" + }, + "source": [ + "## Export and Save HuggingFace model" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "FY9M7_Cs-LBU" + }, + "source": [ + "- Let's install `transformers` package with the `onnx` extension and it's dependencies. You don't need `onnx` to be installed for Spark NLP, however, we need it to load and save models from HuggingFace.\n", + "- We lock `transformers` on version `4.34.1`. This doesn't mean it won't work with the future releases, but we wanted you to know which versions have been tested successfully." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "id": "faBcByOA-LBV", + "outputId": "fb4b2c52-68dd-41cb-b44c-000b0b37b451", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m424.7/424.7 kB\u001b[0m \u001b[31m2.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m84.5/84.5 kB\u001b[0m \u001b[31m5.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m212.7/212.7 kB\u001b[0m \u001b[31m16.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.8/6.8 MB\u001b[0m \u001b[31m34.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m455.8/455.8 kB\u001b[0m \u001b[31m27.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m46.0/46.0 kB\u001b[0m \u001b[31m2.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m547.8/547.8 kB\u001b[0m \u001b[31m24.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m15.9/15.9 MB\u001b[0m \u001b[31m45.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m55.5/55.5 kB\u001b[0m \u001b[31m5.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m21.3/21.3 MB\u001b[0m \u001b[31m35.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m86.8/86.8 kB\u001b[0m \u001b[31m7.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m39.9/39.9 MB\u001b[0m \u001b[31m9.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m116.3/116.3 kB\u001b[0m \u001b[31m11.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m64.9/64.9 kB\u001b[0m \u001b[31m5.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m194.1/194.1 kB\u001b[0m \u001b[31m11.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m12.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.1/1.1 MB\u001b[0m \u001b[31m36.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", + "cudf-cu12 24.4.1 requires pyarrow<15.0.0a0,>=14.0.1, but you have pyarrow 17.0.0 which is incompatible.\n", + "google-colab 1.0.0 requires requests==2.31.0, but you have requests 2.32.3 which is incompatible.\n", + "ibis-framework 8.0.0 requires pyarrow<16,>=2, but you have pyarrow 17.0.0 which is incompatible.\n", + "tensorflow 2.15.0 requires protobuf!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<5.0.0dev,>=3.20.3, but you have protobuf 3.20.2 which is incompatible.\n", + "tensorflow-metadata 1.15.0 requires protobuf<4.21,>=3.20.3; python_version < \"3.11\", but you have protobuf 3.20.2 which is incompatible.\u001b[0m\u001b[31m\n", + "\u001b[0m" + ] + } + ], + "source": [ + "!pip install -q --upgrade transformers[onnx] optimum" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "m5hRR7Xl-LBW" + }, + "source": [ + "- HuggingFace has an extension called Optimum which offers specialized model inference, including ONNX. We can use this to import and export ONNX models with `from_pretrained` and `save_pretrained`.\n", + "- We'll use [Snowflake/snowflake-arctic-embed-m](https://huggingface.co/Snowflake/snowflake-arctic-embed-m) model from HuggingFace as an example and load it as a `ORTModelForFeatureExtraction`, representing an ONNX model.\n", + "- In addition to the snowflake model, we also need to save the `BertTokenizer`. This is the same for every model, these are assets (saved in `/assets`) needed for tokenization inside Spark NLP." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "id": "NBrJz3Qt-LBX", + "outputId": "b28aca4b-b230-4b59-e0ef-429eb46ef9f3", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 487, + "referenced_widgets": [ + "01a13838b42c4b74b86688d23f83d0e6", + "ab8d5f78f9f148f6882ea5f8df117b05", + "130f3d7b83c34fcdb57c052db4f380ec", + "e32b85e3d4914b179040727cb0a07c7c", + "c36e66d1c0ec4d3288725f79aab2c804", + "e523b368b4bf4730ab0d1a461b7adb0e", + "ab98619de7e3408a9f996750afc5b710", + "f54d071adef04bc2aeafbe092421de2f", + "00b2642351554eb7ad3d0a18b441370d", + "914f40f0500d4cafa0413cc07e4bd316", + "e4b8ca47ec964020977ffc7e83a6116a", + "01de9036be0c44699be8dc1581ed9677", + "443d8cc16ad24d8c9e47408df83a6c53", + "d7669040f645442781b7d51cb7262201", + "e9333e4b41bf4067885461897bd3c0d9", + "977eb0478f814cb296c41fd5d1a2c97f", + "c49df48d81c0463e95fe32c5225e3464", + "df2cb36d5f514461bd23ee0b10fe177f", + "e7b37140377e415abdda1d54a36c866f", + "1013375302bb40d59aec7e5863923fbd", + "0f67f4199ceb40bd9b933db1206e7122", + "8afec1bc2fe74a1b85709b2cd1970e17", + "7c49fcb0adf24fffacfc78f219ee2458", + "3b5c54406b66456b9385d2e43e604fe9", + "ff793bb70a0c48b38309f1264edc5b9d", + "1c15a50073c94145b76c692f0b07f925", + "87c0ee6209214c18aaaa357346149c14", + "954976c4327a46e0af403fbb9ff36934", + "6b5087b00109475f83dba93784a6ec82", + "5f90c27303e44db88a38b863af98e55f", + "fe8d36df978945248825a71d33b0c841", + "76bac98bd4634405ac1c5c27b4dd0a7b", + "16dc91e1466c4b9eb4dd96a698930a34", + "ca49cc4036424696af4112ba2ac316d3", + "67e095fa699f4a189bede10cf0b48c8d", + "3786a94ac75b41b3843c37b852afcb05", + "b13431eee37e42528dd7e090208cb09b", + "2eb9e953086f4735b28afd4fdce98aeb", + "938226ecebf746f48ea13307b4fe0f5d", + "530793d3faf24f4da6927256ddfeddea", + "cd760140c4f340ec931f83a1464696a8", + "5065495ab87244cd96c99742eff83f2a", + "64abfed62f454f88a5b8cadd0a5bdfdb", + "ca8b10b455b349bd922c7861265dc7a1", + "17d6731b17bc40e2905e578a165ee928", + "6cc1ae19c71c4444bc97a99b6b4d5960", + "c330031d110745558faaa4fa5a1f47f4", + "d4e23f3a28a141c2bc0b559fcb7eee84", + "82c9b4084b8b4cdda57f31ead7a0296e", + "0a863eed28e2448ba134e48de2a12c02", + "5485f261163d423d916099eb0c4e1cd9", + "d3b9645dafb84488bd951788afca600c", + "bf94c73d8dd04bbabcb61da1afc31765", + "23bed1e463e84c42a3c3a7a9e314f8ab", + "cb3d3466c0254682b7eb04cd6bff63e7", + "a4bc7091346943f99a8c3df133d8aaec", + "24b8fd85041f459b856f7a81f4794e1e", + "077b3a45937044099a93762ae19ab486", + "925068f3d6ff42b8876ab2b589a00858", + "4b097fd1ba834560bd8de0537345db83", + "b652a620066f4fa0b1705efcee874386", + "4530c6787c1444ddaf8d55c98c1e539f", + "ab704bfff72b4a4d9819513647f0838e", + "a9bda9ed4e1243d2a914003ff93d71ad", + "3247354c44e343718d6be93cd153ae98", + "89cc2e327672474c924f8835cd7d514c" + ] + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_token.py:89: UserWarning: \n", + "The secret `HF_TOKEN` does not exist in your Colab secrets.\n", + "To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.\n", + "You will be able to reuse this secret in all of your notebooks.\n", + "Please note that authentication is recommended but still optional to access public models or datasets.\n", + " warnings.warn(\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "config.json: 0%| | 0.00/738 [00:00 False\n" + ] + } + ], + "source": [ + "from optimum.onnxruntime import ORTModelForFeatureExtraction\n", + "\n", + "MODEL_NAME = \"Snowflake/snowflake-arctic-embed-m\"\n", + "EXPORT_PATH = f\"onnx_models/{MODEL_NAME}\"\n", + "\n", + "ort_model = ORTModelForFeatureExtraction.from_pretrained(MODEL_NAME, export=True, trust_remote_code=True)\n", + "\n", + "# Save the ONNX model\n", + "ort_model.save_pretrained(EXPORT_PATH)\n", + "\n", + "# Create directory for assets and move the tokenizer files.\n", + "# A separate folder is needed for Spark NLP.\n", + "!mkdir {EXPORT_PATH}/assets\n", + "!mv {EXPORT_PATH}/vocab.txt {EXPORT_PATH}/assets/" + ] + }, + { + "cell_type": "markdown", + "source": [], + "metadata": { + "id": "dHf9NdTsDexb" + } + }, + { + "cell_type": "markdown", + "metadata": { + "id": "7dolgHbD-LBX" + }, + "source": [ + "Let's have a look inside these two directories and see what we are dealing with:" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "id": "o2wua50w-LBY", + "outputId": "e11554f1-d60b-4ca3-a8eb-7edb650b9876", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "total 426348\n", + "drwxr-xr-x 2 root root 4096 Jul 18 14:18 assets\n", + "-rw-r--r-- 1 root root 675 Jul 18 14:17 config.json\n", + "-rw-r--r-- 1 root root 435844616 Jul 18 14:18 model.onnx\n", + "-rw-r--r-- 1 root root 695 Jul 18 14:17 special_tokens_map.json\n", + "-rw-r--r-- 1 root root 1381 Jul 18 14:17 tokenizer_config.json\n", + "-rw-r--r-- 1 root root 711649 Jul 18 14:17 tokenizer.json\n" + ] + } + ], + "source": [ + "!ls -l {EXPORT_PATH}" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "id": "97ScuGul-LBY", + "outputId": "db793013-7c63-484c-e595-c84bea26f0f5", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "total 228\n", + "-rw-r--r-- 1 root root 231508 Jul 18 14:17 vocab.txt\n" + ] + } + ], + "source": [ + "!ls -l {EXPORT_PATH}/assets" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "CFLnQ4vm-LBZ" + }, + "source": [ + "## Import and Save snowflake in Spark NLP\n", + "\n", + "- Let's install and setup Spark NLP in Google Colab\n", + "- This part is pretty easy via our simple script" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "id": "dxCEAixU-LBZ", + "outputId": "4ed31fde-fdd2-4a26-c814-1a86ce99f04b", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Installing PySpark 3.2.3 and Spark NLP 5.4.1\n", + "setup Colab for PySpark 3.2.3 and Spark NLP 5.4.1\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m281.5/281.5 MB\u001b[0m \u001b[31m1.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m579.2/579.2 kB\u001b[0m \u001b[31m34.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m199.7/199.7 kB\u001b[0m \u001b[31m19.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h Building wheel for pyspark (setup.py) ... \u001b[?25l\u001b[?25hdone\n" + ] + } + ], + "source": [ + "! wget -q http://setup.johnsnowlabs.com/colab.sh -O - | bash" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "QyeZdo61-LBa" + }, + "source": [ + "Let's start Spark with Spark NLP included via our simple `start()` function" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "tWzqJOSe-LBb", + "outputId": "7c2b31fb-7789-493c-9b53-bf9a4108d959", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/usr/lib/python3.10/subprocess.py:1796: RuntimeWarning: os.fork() was called. os.fork() is incompatible with multithreaded code, and JAX is multithreaded, so this will likely lead to a deadlock.\n", + " self.pid = _posixsubprocess.fork_exec(\n" + ] + } + ], + "source": [ + "import sparknlp\n", + "# let's start Spark with Spark NLP\n", + "spark = sparknlp.start()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "5X61x34a-LBb" + }, + "source": [ + "- Let's use `loadSavedModel` functon in `SnowFlakeEmbeddings` which allows us to load the ONNX model\n", + "- Most params will be set automatically. They can also be set later after loading the model in `SnowFlakeEmbeddings` during runtime, so don't worry about setting them now\n", + "- `loadSavedModel` accepts two params, first is the path to the exported model. The second is the SparkSession that is `spark` variable we previously started via `sparknlp.start()`\n", + "- `setStorageRef` is very important. When you are training a task like NER or any Text Classification, we use this reference to bound the trained model to this specific embeddings so you won't load a different embeddings by mistake and see terrible results 😊\n", + "- It's up to you what you put in `setStorageRef` but it cannot be changed later on. We usually use the name of the model to be clear, but you can get creative if you want!\n", + "- The `dimension` param is is purely cosmetic and won't change anything. It's mostly for you to know later via `.getDimension` what is the dimension of your model. So set this accordingly.\n", + "- NOTE: `loadSavedModel` accepts local paths in addition to distributed file systems such as `HDFS`, `S3`, `DBFS`, etc. This feature was introduced in Spark NLP 4.2.2 release. Keep in mind the best and recommended way to move/share/reuse Spark NLP models is to use `write.save` so you can use `.load()` from any file systems natively.st and recommended way to move/share/reuse Spark NLP models is to use `write.save` so you can use `.load()` from any file systems natively.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ZfRgnm5V-LBc" + }, + "outputs": [], + "source": [ + "from sparknlp.annotator import *\n", + "\n", + "# All these params should be identical to the original ONNX model\n", + "snowflake = SnowFlakeEmbeddings.loadSavedModel(f\"{EXPORT_PATH}\", spark)\\\n", + " .setInputCols([\"document\"])\\\n", + " .setOutputCol(\"snowflake\")\\\n", + " .setCaseSensitive(True)\\\n", + " .setDimension(768)\\" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "YklsGumf-LBc" + }, + "source": [ + "- Let's save it on disk so it is easier to be moved around and also be used later via `.load` function" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "thmPSatB-LBc" + }, + "outputs": [], + "source": [ + "snowflake.write().overwrite().save(f\"{MODEL_NAME}_spark_nlp\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "F9nJj6Fs-LBc" + }, + "source": [ + "Let's clean up stuff we don't need anymore" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "-GbJfqzE-LBc" + }, + "outputs": [], + "source": [ + "!rm -rf {EXPORT_PATH}" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "CfhLgj1U-LBd" + }, + "source": [ + "Awesome 😎 !\n", + "\n", + "This is your ONNX snowflake model from HuggingFace 🤗 loaded and saved by Spark NLP 🚀" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "9irc4X-h-LBe", + "outputId": "e82d0572-9482-4109-ea72-8d924820ea08", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "total 1305800\n", + "-rw-r--r-- 1 root root 1337122892 Jul 13 12:32 bert_onnx\n", + "drwxr-xr-x 3 root root 4096 Jul 13 12:31 fields\n", + "drwxr-xr-x 2 root root 4096 Jul 13 12:31 metadata\n" + ] + } + ], + "source": [ + "! ls -l {MODEL_NAME}_spark_nlp" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "q6kMLGGM-LBe" + }, + "source": [ + "Now let's see how we can use it on other machines, clusters, or any place you wish to use your new and shiny snowflake model 😊" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "EuxOV23j-LBf" + }, + "outputs": [], + "source": [ + "import sparknlp\n", + "\n", + "from sparknlp.base import *\n", + "from sparknlp.annotator import *\n", + "\n", + "document_assembler = DocumentAssembler()\\\n", + " .setInputCol(\"text\")\\\n", + " .setOutputCol(\"document\")\n", + "\n", + "snowflake_loaded = SnowFlakeEmbeddings.load(f\"{MODEL_NAME}_spark_nlp\")\\\n", + " .setInputCols([\"document\"])\\\n", + " .setOutputCol(\"snowflake\")\\\n", + "\n", + "pipeline = Pipeline(\n", + " stages = [\n", + " document_assembler,\n", + " snowflake_loaded\n", + " ])\n", + "\n", + "data = spark.createDataFrame([['William Henry Gates III (born October 28, 1955) is an American business magnate, software developer, investor,and philanthropist.']]).toDF(\"text\")\n", + "model = pipeline.fit(data)\n", + "result = model.transform(data)" + ] + }, + { + "cell_type": "code", + "source": [ + "data = spark.createDataFrame([['my name is ahmed']]).toDF(\"text\")\n", + "result = model.transform(data)" + ], + "metadata": { + "id": "d3LjIpizF06G" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ayJxQu9P-LBf", + "outputId": "985a3153-fea1-4b90-8f79-6e350ca5f1c1", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "|embeddings |\n", + "+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "|[-0.34382302, -0.7639188, -0.29532853, -0.2195783, 0.2455769, -0.4229508, -0.36216187, -0.012965985, 0.023540433, -0.19329068, 0.5742133, 0.060704723, -0.13526358, -0.23579015, -0.65702033, 0.16954994, -0.455106, -0.6508099, -0.6014989, -0.05064115, 0.64672303, 0.28267953, -0.28648573, -0.21903473, 0.21101406, 0.14217895, -0.3187964, -0.55546755, 0.047357474, -0.028062135, 0.15671617, 0.32782352, 1.0356017, -0.35694343, 0.090203725, -0.39940238, 0.2377137, 0.04289239, -0.83077365, -0.30747095, -0.37344897, 0.020336881, 0.17549518, -0.6200898, -1.1107208, 0.87672776, -0.6474623, 0.28582448, 0.6809965, -0.5625576, -0.009966444, -0.0038438812, -0.0033082888, 0.5805783, -0.023306413, -0.033652607, 0.33887282, -0.5935858, -0.52701557, -0.015172597, -0.04568176, 0.17204595, 0.51059705, -0.9434202, -0.12023585, -0.06451868, -0.10867477, 0.0066246428, 0.54643863, 0.28672957, -0.37903327, -0.38159204, -0.30053133, -0.16701287, -0.4589608, -0.4001207, 0.34652534, -0.09729831, 0.31784576, -0.4899285, -0.04996611, 0.12275653, -0.40550154, -0.11426968, -1.332759, -0.15492938, 0.44397745, 0.4191652, -0.2306405, 0.28326866, 0.32754475, 0.45116088, -0.034916583, -0.32043755, 1.2814814, 0.21437134, -0.5537772, 0.836426, 0.64343923, 0.3861801, 0.5764352, 0.7387368, 0.16284445, 0.78098327, 0.17442234, 0.26325765, 0.119811565, -0.1483348, -0.64702636, -0.35733983, 0.3796718, -0.19090866, 0.13312304, -0.22246164, -0.41042545, 0.68401885, 0.08739321, 1.505851, -0.828583, 0.2813444, 0.048760243, 0.6915584, 0.07968675, -0.5490539, -0.1858396, -0.2500198, -0.70932007, 1.1625819, -0.5956564, 0.12859488, 0.044678614, -0.9919923, 0.028216703, -0.10957426, 0.26350233, 0.37062097, -0.26774374, 0.29832423, 0.162229, -0.3984412, 0.49556893, 1.038005, 0.1345725, 1.1500555, 0.7358953, 1.1593583, -0.027192052, 0.17263828, 0.76237684, 0.2800954, -0.14811504, 0.5782432, 0.14932097, 0.32067293, 0.34420455, -0.425401, 0.5178006, 0.060725544, 0.11236793, 0.8370359, 0.21886389, 0.6390943, -0.14097728, -0.082827926, 0.5296423, 0.7766013, 0.282705, -0.14033636, -0.28686777, -0.27210304, 0.6499478, 0.005612598, 0.07226731, 0.4756507, 0.027976975, 0.42017537, 0.2220541, 0.36970848, 0.76319504, 0.3258121, -0.79180735, -0.13926157, -0.6080022, 0.59742796, -0.38264486, 0.9115692, 0.18149358, 0.34721246, -0.96415913, -0.52425444, -0.26806143, 0.9897512, 0.027170025, -0.43949214, 0.09051646, -0.77175283, -0.7448105, -0.4367921, -0.7063309, -0.6462334, -0.104963556, 0.7610066, 0.17043367, 0.82345265, 0.41576767, -0.38544682, 0.04503812, 0.5528193, -0.663977, -0.07959141, 0.3928753, 0.29727674, 0.07480703, -1.1615232, 0.66800654, -0.17684306, -0.48435748, 0.0028984528, -0.55398554, -0.095459744, -0.47421366, -0.16637118, 1.2867203, 0.24746904, 0.08661995, 0.2982232, -0.001436633, 0.43248415, -0.21963756, -0.8467045, -0.23745827, 0.35262188, 0.1798515, 0.40284228, 0.56780744, 0.1771839, 0.47714037, 1.2448977, -0.06993399, 0.6533343, 0.15598316, -0.035846546, 1.0710862, 0.6975996, 0.10780808, -0.3743543, 0.46591824, 0.8361715, -0.017084923, 0.0885131, -0.43680543, 0.19605124, 0.3760269, 0.25628772, -0.7169098, -0.25192723, 0.41902512, 0.8204317, -0.22786322, -0.5434362, -0.34270644, -0.23572937, 1.0749506, 0.2064638, 0.55245584, 0.26007867, -0.36568752, 0.15649298, -0.43122247, -0.81344384, -0.6552298, -0.5143997, 0.10226101, -1.0266656, -0.6298487, -0.38936645, -0.05717794, -0.4247489, 0.5068039, 0.28465864, 0.43459454, -0.52415824, -0.75574684, 0.66673005, 0.0035245717, 0.17674892, 0.20672813, 0.4581333, 0.21088994, 0.4575338, -0.70848036, -0.6916634, 0.5673116, 0.13055187, -0.2592026, -0.22786576, -0.5018001, -0.108587295, -0.6182472, -0.83368945, 0.33190593, 0.17691836, -0.6371653, -0.286325, -0.18034449, -0.35252026, 0.5681247, -0.29469046, 1.0449629, 0.69169474, 0.32557452, 0.1147932, -0.29042447, 0.056729708, -0.86662745, 0.12208256, 0.91716117, 0.2551935, -1.1942776, -1.3805144, -0.6472451, 0.4746371, 0.08796756, -0.5599111, 0.62977296, 0.41005462, 0.51441884, -1.2049121, 0.5043729, -0.51036304, -1.0484636, -0.7977933, -0.4467032, -0.20135196, 0.0043337867, -0.18018931, 0.5097304, -0.47010112, -0.30884218, -0.46875146, 0.7154764, -0.6853694, 0.28855023, 0.5247269, 0.10522541, 0.3382325, -0.3166059, -0.41154835, -0.2589488, 0.8713854, -0.1530284, -0.5922816, 0.5663781, 0.06496408, 0.41166157, 0.17725252, -0.56157565, -0.922353, 0.6280792, 0.23620132, -0.045326993, 0.47558698, 0.08431472, -0.026172388, 0.27536562, -0.31601334, -0.48178378, 0.42288107, 0.71798617, -1.2850635, 0.72555995, -0.9443097, -0.057755213, -0.063769124, -0.3060531, -0.07269244, 0.73625356, -0.6549738, 0.18951517, -0.20920853, 0.26311293, 0.5095338, -0.0636578, 0.5903229, 0.7521865, 0.09310932, -0.8696724, -0.56035423, -0.1504253, 0.25686908, -0.826502, 0.26464945, 0.119002745, -0.12116926, -0.5778526, -0.85494304, 0.4166103, -0.06448383, 1.0341904, -0.16823477, 0.34140986, 0.6091873, -0.09359924, 0.123844914, 0.29637912, 0.6587535, -0.67232114, 0.8413933, 0.26271322, -0.4427753, -0.5621945, -0.27745956, -0.23041287, 0.42987797, 0.5723296, -0.30729887, -0.18549915, 0.23374258, 0.31279692, 0.922415, -0.077189036, -0.9674709, -0.16032372, 0.5677739, -0.07297076, -0.13331212, 0.1819278, -0.04963054, 1.0333323, 0.06580423, 0.60407937, -0.58166575, -0.1761021, -0.3395104, 0.047278076, -0.68973774, 0.2797558, -0.15112583, 0.28326407, -0.39347935, 0.15909457, -0.020174801, -0.77765167, -0.1591453, -0.349147, 0.03175959, -0.40151787, 0.5332307, -0.49628213, -1.1186206, 0.35551023, -1.2806996, 0.31238946, 0.49680242, -0.32729352, 0.28455338, -0.106643006, 0.70506424, 0.4839293, -0.62136674, 0.21969198, -0.15588084, 0.42103747, -0.122590005, -0.8028827, 0.11748108, -0.443256, -0.25602362, 0.303544, 0.47785038, 0.15658212, 0.28950688, -0.2270428, -0.5633213, 0.4679659, -0.41091856, 0.7271259, -0.06848737, -0.07462746, -0.30374345, -0.69484365, -0.029031683, 0.042577565, 0.045630764, -0.94432694, -1.2439426, 0.054932877, -0.503823, -0.07479839, 0.17525958, -0.05284507, -0.23983689, -0.3052814, -0.13128498, 0.21015988, 0.08478374, -1.0814025, 0.009649772, 0.24467903, 0.44210255, 0.88081115, 0.022029012, -0.5564494, -0.06717668, -0.38523078, -0.0060943365, -0.46862578, -0.24426433, 0.50947887, -0.33288345, 0.16172422, -0.094306186, -0.7587089, 0.22555363, 0.5186889, 0.32252777, -0.24106239, 0.16946417, -1.2676895, 0.59302115, 0.7941353, -1.31314, 0.6171258, 0.61276156, 0.36073977, 1.062248, 0.35122928, 0.05878681, -1.4033682, -1.1530699, 0.036014624, -0.4097407, -0.94043636, -0.9578559, -0.28575903, -0.16709006, 0.93655497, 0.12025164, -0.40406698, -0.23058698, -0.44937667, 0.7178352, 0.18482675, -1.352847, -0.7093728, 0.036691744, 0.1868393, 1.2062835, 0.20900483, 0.1900438, 0.42187914, 0.7393311, 0.01917834, -0.1944406, -0.87139857, -0.50439554, -0.30718127, 0.5397891, -0.0075758966, 0.043465585, -0.8734193, -0.27654916, -0.3044527, -0.4053133, -0.39614293, -0.78231263, -0.40768906, -0.5373927, 0.5965045, -0.16269608, -0.049724266, -0.52445054, 0.68790966, 0.45102564, -0.57633567, -0.50133747, -0.4216268, -0.8402114, -0.98546207, 0.25463197, -0.3031208, -0.16779536, -1.0868003, 0.37450346, 0.004810621, -0.18339764, -0.0756356, 1.3896927, -0.11678412, -0.7597667, -0.04281687, 0.75043535, 0.2184518, -0.69242823, 0.42157817, -0.21007232, -1.1060389, -0.38643596, -0.8248878, -0.69632286, 0.49110377, 0.28595734, 0.7530365, -0.29152632, 1.2766682, 0.23020105, -0.4851892, -0.6085308, 0.091897726, 0.25800306, 0.006946196, 1.4138781, -0.37213212, -0.049757317, 0.3904585, 0.1890175, 0.1960412, 0.05610615, -0.007944852, -0.52191496, 0.89295936, 0.597078, 0.24663723, -0.40927508, -0.6435002, 0.06950784, -0.057004556, -0.648442, -0.6173633, -0.09971482, 0.17189269, -0.41264763, 0.7326116, 0.8982043, -0.13114993, 0.13869628, -0.54142064, 0.28100464, -0.41488054, -0.05924587, 1.0618738, -1.2461073, -0.20907761, -0.64098436, 0.31824037, -0.42298764, -0.3983202, 0.33304936, 0.61165816, 0.1935049, -0.13475817, 0.16305187, 0.27991325, 0.46798226, 0.2795768, -0.4516962, -0.14288095, -0.5549746, 0.020606859, 1.2248478, -0.10159976, 0.20679832, -0.6620886, 0.045431983, 1.0017108, 0.3009657, -0.6183575, -0.4248906, 0.13431798, -1.2909935, -0.5923603, -0.28251624, 0.17934823, 0.07031834, 0.59672225, 0.7146158, -0.3355209, -0.12629166, 0.6071043, -0.07416507, 0.48883423, 0.26316744, 0.4327047, -0.4682879, -0.57991934, -0.63010156, -0.41035315, -0.18173791, -0.3430205, -0.44401026, 0.58111936, -0.738792, 1.0378027, -0.6315321, -0.18351345, 0.11120566, -1.0696158, -0.329059, 1.4185413, 0.23530939, 0.7034722, -0.008895047, -0.5903076, 0.22824943, 0.13582553, -0.48848617, -0.08104466, -0.16406308, 0.28297332, -0.17043504, -0.7682463, -0.79046863, -0.005031258, -0.31621116, -0.031882703, -0.15599462, 0.081370145, -0.7047935, -0.11375661, -0.4892374, 0.18718548, -0.440037, 0.23227659, 0.5499848, 0.07841947, 0.77556974, -0.036050923, 0.9194568, -0.2508311, -0.023878515, 0.7462058, 0.14056627, 0.8211614, -0.00624723, 0.5570172, -0.32626113, -0.47461626, -1.2058082, -0.26237994, 0.8533056, 0.5349081, -0.010770854, -0.26707786, 0.2478404, -0.8412853, -0.40129897, 0.30174085, -0.64373815, -0.5258009, 0.3331402, 0.1295841, -0.48547095, -0.45966202, -0.28892982, 0.3901119, -0.104447044, 0.017783942, -0.41943204, 0.5807005, 0.124927, 0.15499958, -0.5557471, 0.8136188, -0.3043661, 0.106949285, 0.37116286, -0.6857202, 0.13673896, 0.43197393, 0.5862661, -0.22387898, 0.041131206, 0.07151453, 0.05050052, -0.0065366505, 0.6110207, 0.18988836, 0.08568794, 0.94192713, 0.3702891, 0.78933036, -0.6849505, 0.43718055, 0.46364293, -0.29015285, -0.20669219, 0.80728275, 0.29214436, 0.6977852, 0.6038933, 0.051081143, 0.7441728, -0.23457226, -0.35401455, -0.7716396, 0.7912216, 0.5333594, 0.6838267, -0.19151077, 0.36266768, 0.9615327, -0.19704953, 0.6314413, 0.61273956, 0.41767848, -0.92052895, 1.0125558, -0.56784683, -0.5221475, -0.3821656, -0.30555943, -0.09023011, -0.4952502, -0.255218, -0.5198271, -0.3624009, 0.83835, -0.5189455, 0.7749921, 0.8745889, -0.4002476, -0.05511359, 0.5421546, -0.6545918, -0.45468587, 0.12210384, 0.1281848, 0.5453239, 0.5941051, -0.35340282, 0.41597596, -1.4151531, 0.6731845, 1.1067245, 0.10581938, 0.10546674, -0.32218388, 0.0018461533, -0.15122917, -0.25454873, 0.2908136, 0.78047323, -0.59935665, -0.50905824, -0.8459606, 0.6423666, -0.39096826, 0.4526288, 0.46923774, -0.13597676, -0.20719595, 0.66992563, 0.009923093, 1.0646205, 0.44394243, 0.02741922, -0.46361464, 0.68766016, 0.74081826, -0.6465571, -0.22039431, 0.081415735, -0.05480539, -0.72510767, 0.6765382, -0.9052504, 0.2932966, -0.24140093, 0.04373298, 0.01232277, 1.0543664, -0.36749, -0.3895105, -0.26150957, -0.16362241, -0.85256153, 0.093595356, 0.41653568, 0.97165906, -0.02995267, -0.08639704, -1.0338662, -0.59532845, -0.17624164, -0.59719163, -0.15866566, 0.49693626, -0.70960253, -0.2927117, -0.5108945, 0.13940638, 0.017329425, -0.19439377, -0.48081458, 0.7208151, 0.8167032, -0.13828832, -0.081908114, -0.812663, 0.006900521, 0.30682358, 0.57056475, -0.7502938, 0.33736476, 0.8902075, -0.99833184, 0.46021265, -0.78656507, 0.3114715, 0.22936626, -0.8399809, -0.4359533, 0.03885804, 0.015745353, -0.23357376, 0.20497116, 0.3649012, 0.49616176, -0.40664977, -0.9300352, -0.20635843, -0.5537187, -0.367343, -0.61381567, 0.86728853, -0.90770274, 0.3473786, 0.54269135, -0.89548135, 4.0020537, 0.46005508, 0.2364695, -0.07364381, 0.3760531, 0.73804533, 0.211241, -0.06629054, 0.28147686, -1.2758971, 0.666179, -0.2465826, 0.24168947, 0.08194007, 0.31631172, 0.49249345, -1.0039179, -0.7133441, 0.19374055, -1.2881866, -0.95821995, 0.22497262, -0.056647554, 0.19396526, -0.4228366, -0.15585122, 0.7316429, -1.389649, 0.40025494, -0.16261509, 0.8457887, -0.18749124, 0.7495812, -1.1455679, -0.0149976015, 1.2095423, -0.19094399, -0.09220508, 0.2774039, 0.15552622, -0.20236626, 0.39742178, 0.6135982, -0.43280262, 0.4076932, 0.99114585, -0.54080117, 0.8593945, 0.35554484, -0.7796996, 0.34610403, -1.1453264, 0.77755535, -0.20043913, -0.45311454, 0.6199839, 0.1838128, -0.21140811, -0.91401553, 0.37229118, 0.1372816, -0.21494688, 0.35827234, -0.47346738, -0.30261704, 0.3402105, 0.8781418, 0.6873699, 0.25040716, -0.14686292, -0.39073685, -0.36627245, 0.28347406, -0.49368846, -0.03832771, -0.021354876, 0.05511899, 1.3456752, 0.28731528, 0.06491297, -0.31189138, -0.044086054, -0.57310236, -0.54433143, -0.042461738, 0.7804604, -0.3118634, -0.12859906, -0.6192734, 0.6973703, 1.3136543, -0.7188195, -0.5996545, -0.0913739, -0.6589699] |\n", + "|[-0.54753375, -0.73252505, -0.34126648, -0.24534917, -0.22225188, -0.34929994, -0.33504894, -0.28104997, -0.04686583, -0.06701691, 0.6845837, -0.34664667, -0.35657787, -0.37215066, -0.68523103, 0.4045111, -0.56338805, -0.39283428, -0.36127412, -0.3461669, 0.91924536, 0.30020407, -0.36267594, -0.3542155, 0.19008167, -0.020349268, -0.33944073, -0.7461714, -0.06950517, -0.028755333, 0.0019559488, 0.2958708, 0.66705, -0.25987512, -0.045964304, -0.68017364, 0.28170416, -0.23792937, -1.1094512, -0.3351839, -0.48227024, -0.35298076, 0.13910323, -0.48779717, -1.0446025, 0.975886, -0.783119, 0.22559762, 0.31027403, -0.56081307, -0.004933413, -0.113725066, -0.103655376, 0.7428892, -0.08494868, -0.09466062, 0.2545832, 0.12744492, -0.61870104, -0.23933683, -0.09587975, -0.13462573, 0.32756755, -0.82222784, -0.005648803, -0.022327272, -0.13399881, -0.026310122, 0.55402017, 0.18076599, -0.09253382, -0.53394574, -0.2063862, -0.4609509, -0.38738367, -0.77925545, 0.11045748, 0.022349484, 0.13773164, -0.6789481, 0.0058720447, 0.51864904, -0.2754826, 0.029813044, -1.3362349, -0.07503712, 0.33150998, 0.4656773, -0.013942683, 0.15215617, 0.3420563, 0.27202427, 0.032229707, -0.35287362, 0.96277094, 0.11546704, -0.6990063, 1.0832118, 0.38631126, 0.30049884, 0.6385046, 0.85044765, -0.002661943, 0.46938732, -0.063011184, 0.19936304, 0.045983054, -0.18572508, -0.62648934, -0.11864261, 0.5710881, -0.1615919, 0.46815702, -0.70132256, -0.38150182, 0.42064193, -0.17377514, 1.5781684, -1.0037212, 0.08898225, 0.21539009, 0.61744636, 0.10641429, -0.52384496, -0.19723973, -0.0845697, -0.37215686, 1.0957067, -0.38058576, 0.19481423, 0.08998902, -0.7152623, -0.33968282, 0.17668593, 0.2985932, 0.64231324, 0.11678291, 0.49392188, 0.03252888, -0.3048764, 0.82678604, 0.8390125, 0.118968636, 1.1047343, 0.6213644, 0.945865, 0.21715309, -0.20630755, 0.3473736, 0.38018095, 0.054955, 0.51536936, -0.15019442, 0.1734224, 0.3138789, -0.66248536, 1.0389138, -0.23505607, 0.29267576, 0.5592726, 0.58468, 0.4871812, -0.05898605, 0.016175255, 0.6610515, 0.8910481, 0.09013614, -0.09575978, -0.3548632, -0.11575133, 0.19552116, 0.18114519, -0.104396135, 0.50815547, 0.074077904, 0.49469823, 0.26921684, 0.13140027, 1.0111159, 0.4339704, -0.5613228, -0.3498127, -0.44413972, 0.7709384, -0.83183944, 0.9525531, -0.04411538, 0.43897226, -0.89866674, -0.4743637, -0.11327338, 0.94334894, 0.26527333, -0.20609649, 0.114842385, -0.69320464, -0.45334345, -0.30202594, -0.19048873, -0.57316685, -0.19568157, 0.6232054, -0.13043739, 0.79604346, 0.081403814, -0.31508377, 0.18866529, 0.6646555, -0.4816563, -0.13541234, 0.20886323, 0.435884, 0.02787342, -0.78697866, 0.39968574, -0.29243252, -0.46792895, 0.034228764, -0.49155086, -0.32571673, -0.5334321, 0.2438866, 1.0476553, 0.19880792, 0.097849846, 0.32223558, 0.3313236, 0.6060396, -0.10254304, -0.5667025, -0.43090406, 0.2888109, -0.069494456, 0.65251255, 0.7165182, -0.13313231, 0.6297024, 0.98898846, -0.17483526, 0.5949793, -0.00488472, -0.23469053, 1.0465996, 0.588471, 0.18950263, -0.3356701, 0.41276038, 0.86712956, -0.14743277, -0.011407964, -0.38075528, 0.82343847, 0.54457796, 0.30054772, -0.89112365, 0.020935077, 0.34748763, 0.771521, 0.024404144, -0.35012138, -0.48399502, -0.5368786, 0.6484857, 0.18703234, 0.51818854, 0.17306057, -0.14589922, 0.334908, -0.40906885, -0.8763804, -0.38956016, -0.5655085, 0.043092187, -0.76305234, -0.7249036, 0.06066127, -0.119433425, -0.5678577, 0.7409389, 0.3571905, 0.37785155, -0.62790745, -0.7697881, 0.63612396, 0.114832826, 0.18144502, -0.07002154, 0.23024215, 0.46920696, 0.35970873, -0.5498906, -0.6109805, 0.5549983, 0.07009489, -0.13438301, -0.51796097, -0.5399516, -0.12640445, -0.44229114, -0.7902048, 0.344783, 0.40050468, -0.5872028, -0.387923, -0.10902955, -0.121497296, 0.828355, -0.3015951, 1.034576, 0.6758394, 0.45888323, 0.2155149, 0.031363465, -0.02189511, -0.8385011, 0.2038087, 1.0045415, 0.24602148, -1.3525281, -1.5949818, -0.61246645, 0.38037413, 0.044843856, -0.39922723, 0.45036554, 0.40907025, 0.42994094, -1.0577933, 0.29891443, -0.36321375, -0.9001082, -0.8159469, -0.52079, -0.5020977, 0.026911542, -0.047888625, 0.54214233, -0.14434856, -0.3626166, -0.4409255, 0.69067746, -0.8875612, 0.113340996, 0.5506058, 0.13834731, 0.4889381, 0.15430406, -0.6177585, -0.3717771, 0.57757235, -0.3354617, -0.35143122, 0.630615, 0.2860812, 0.2595954, 0.3066489, -0.5790318, -0.4680602, 0.7643828, 0.0955441, -0.24218714, 0.4413454, 0.08855548, 0.0043010064, 0.20789976, -0.6208499, -0.5022242, 0.5764759, 0.55959666, -1.5234252, 0.48074248, -0.9021364, 0.06450019, 0.16437623, 0.17919847, 0.018290188, 0.7702364, -0.84771866, 0.6393365, -0.14365807, 0.39131603, 0.80775917, 0.055600435, 0.49134514, 0.7695691, 0.3436401, -0.47644594, -0.50378084, 0.16258532, 0.46836847, -0.5437469, 0.48740777, 0.039622966, -0.10397347, -0.43113363, -0.90609443, 0.25949433, -0.075106815, 1.2341236, -0.3252258, 0.27310306, 0.585437, 0.322627, 0.063036814, 0.10945157, 0.8757963, -0.56199545, 0.7717171, 0.05844251, -0.5976111, -0.572922, -0.10971376, 0.42457595, -0.010190844, 0.5845959, -0.5348879, -0.26316157, 0.3249657, 0.35864553, 1.2751844, -0.71507204, -0.64779305, -0.04435072, 0.36331734, 0.21861057, -0.047725562, -0.03109271, -0.076620966, 0.8575803, 0.014540294, 0.5748183, -0.6015747, -0.3523195, -0.2939013, -0.2995257, -0.31769463, 0.30246866, -0.108467564, 0.2898032, -0.54123217, -0.17209989, 0.07145776, -0.72386277, 0.15357807, -0.73218334, -0.07682201, -0.14744794, 0.4257097, -0.5269835, -1.0345807, 0.68199575, -1.157342, 0.27254164, 0.44130075, -0.32495654, 0.2365525, 0.1747678, 0.7735819, 0.49847808, -0.73776287, -0.03188204, -0.14907841, 0.688307, -0.20815939, -0.8982262, 0.43605924, -0.50442487, -0.43578035, 0.3952118, 0.49330384, 0.41412094, 0.11769362, -0.20122677, -0.65470445, 0.19700773, -0.56630236, 0.77145374, 0.163491, -0.20788503, -0.36035672, -0.66421247, 0.18576805, 0.20338894, 0.18943296, -0.6510351, -1.2148393, -0.24340789, -0.6880508, -0.3052214, 0.21155629, -0.09739643, -0.33584255, -0.31273136, -0.23966378, 0.08677855, 0.15973774, -0.8556777, -0.09926297, 0.33504716, 0.40274686, 0.7491854, -0.08521496, -0.5200838, 0.20986545, -0.17773375, 0.014627352, -0.48934948, -0.052608456, 0.57409847, -0.33345103, -0.084491305, -0.18431659, -0.79941493, 0.06340313, 0.3656756, 0.4160735, -0.10808784, 0.41328052, -0.9646477, 0.62405294, 0.80017006, -1.3394338, 0.76704276, 0.44924164, 0.1256361, 0.989863, 0.42584834, -0.16088164, -1.276547, -1.3553447, 0.18034008, -0.23728377, -1.0290141, -0.7435376, -0.5425407, 0.024835918, 0.85255957, 0.171984, -0.40443525, -0.066802636, -0.20761046, 0.46351072, 0.1530812, -0.87930495, -0.531067, 0.15570956, 0.5493373, 1.4243349, 0.33948806, 0.14715233, 0.29254633, 0.2795377, 0.095593795, -0.50423187, -0.92769235, -0.7075642, -0.4134293, 0.42480892, 0.0038999328, -0.01672728, -0.3236858, 0.024619795, -0.28217256, -0.40756303, -0.27496597, -0.7631606, -0.10632939, -0.48070928, 0.74604183, -0.2181297, 0.03641598, -0.40388376, 0.76249295, 0.18016076, -0.76112956, -0.5942929, -0.40598303, -1.0366926, -0.8822926, 0.25232533, -0.30407107, -0.14430526, -1.2123798, 0.24516064, 0.097395286, 0.17894435, -0.22790985, 1.3652221, 0.06984864, -0.825074, -0.08120039, 0.7219492, -0.29306614, -0.65385896, 0.42594528, -0.27541572, -1.1152569, -0.39808312, -0.8011583, -0.8168913, 0.4555174, 0.16465312, 0.5968002, -0.29962397, 1.2788833, 0.40271848, -0.51808405, -0.6206527, 0.3936767, 0.039946895, 0.058476914, 1.3305379, -0.35131252, -0.3000706, 0.5892416, 0.115645304, 0.0768802, 0.18459281, 0.13406429, -0.41802016, 0.5623951, 0.69880855, 0.23829265, -0.21682742, -0.808322, -0.26961216, -0.034693636, -0.76801425, -0.60827315, -0.12528977, 0.10364565, -0.0679969, 0.45488882, 1.0513133, -0.12667675, 0.48746687, -0.94625854, 0.41846642, -0.3441527, 0.3741606, 0.9397687, -0.79499555, -0.06265128, -0.5986436, 0.45631105, -0.34520984, -0.062240224, 0.16924585, 0.36480683, 0.048022475, -0.3609887, 0.027800525, 0.3476141, 0.76499116, -0.11945039, -0.39266944, -0.0046548797, -0.699847, 0.19685234, 1.2500978, -0.43763897, 0.34405148, -0.66321576, 0.030297328, 0.58599085, 0.18183488, -0.40901142, -0.50807756, -0.19580285, -1.3748498, -0.42388624, -0.17295913, -0.13739511, 0.0656988, 0.40051222, 0.6507585, -0.24096277, -0.357605, 0.53399485, -0.10573802, 0.79987013, -0.07294164, 0.47288612, -0.35756058, -0.6741399, -0.7831378, -0.3233571, -0.3221753, -0.48701698, -0.022775207, 0.52386314, -0.93506384, 0.66624933, -0.80944353, 0.059419878, 0.54011136, -1.0171348, -0.105557606, 1.2675935, 0.4627395, 0.44310394, 0.03729657, -0.48268935, 0.54423624, 0.15605602, -0.41410694, 0.04910262, -0.18838644, 0.053437334, -0.30838925, -0.68716073, -0.36771116, 0.27490893, -0.54769504, -0.1773473, -0.034618072, -0.06012974, -0.6623197, -0.110626295, -0.36846602, 0.35610113, -0.10027956, 0.33257416, 0.65030175, 0.0083401855, 0.90236527, -0.118568055, 1.1763779, 0.065519795, -0.02041991, 0.7323104, 0.11402045, 0.79344183, 0.21431717, 0.29091322, -0.26422718, -0.43774372, -1.2862306, -0.10959319, 1.0963562, 0.1673564, 0.35903856, -0.6009535, 0.15173997, -0.9596806, -0.1659977, 0.3873246, -0.7369822, -0.42570305, 0.3974469, -0.06510209, -0.6393629, -0.2779839, -0.21989518, 0.455682, -0.110659115, 0.011944628, -0.39101547, 0.620595, 0.21351713, 0.25844342, -0.2260688, 0.52730274, 0.06680948, 0.32600442, 0.38445634, -0.45017225, 0.018660948, 0.4379404, 0.79625696, -0.64162135, 0.051217306, 0.35014242, -0.52396464, 0.13640784, 0.9437506, 0.005260907, 0.284465, 0.64774066, 0.509874, 1.1798601, -0.5249062, 0.48651072, 0.70431757, -0.41430533, -0.2793684, 0.87926453, 0.49291706, 0.508239, 0.49603128, 0.21564388, 0.63016266, 0.031403143, -0.6127473, -0.7099178, 0.45290688, 0.6540468, 0.752192, -0.08332994, 0.6076842, 0.8356986, -0.40576708, 0.1902684, 0.33616206, 0.7617425, -0.62900203, 0.9858739, -0.71817553, -0.46941832, -0.57057273, -0.27315325, 0.059740376, -0.80827624, -0.35085112, -0.6081121, -0.39645725, 0.77862364, -0.23535857, 0.5208945, 0.6142132, -0.29867318, 0.12100379, 0.22953218, -0.8376499, -0.24289241, 0.030346569, 0.18427645, 0.0958551, 0.30726004, -0.094793856, 0.17340821, -1.7062068, 0.5899309, 1.0969516, 0.18314667, -0.17355633, -0.14643389, -0.0017704479, 0.01190193, -0.28426555, 0.3532016, 0.69448036, -0.67668456, -0.52376294, -0.51052725, 0.5798278, -0.2621306, 0.3329077, 0.31569207, -0.24587135, -0.30972537, 0.7106888, -0.26862073, 0.56462777, 0.4167421, 0.029703217, -0.64708304, 0.7294192, 0.456091, -0.7826855, -0.33859366, -0.021067917, -0.20990762, -0.7482319, 0.67080474, -0.9740769, 0.5508996, -0.4644943, 0.446861, 0.24484295, 1.130375, -0.15015736, -0.59131515, 0.038440697, 0.13772324, -1.0634472, -0.009677932, 0.4801675, 0.9769374, 0.2209578, -0.030695738, -0.88859135, -0.99514, -0.12350673, -0.5952728, -0.031008124, 0.37254173, -0.58829224, -0.38143092, -0.7599607, 0.11427358, 0.2671205, -0.119785026, -0.43551, 0.7886298, 0.9762158, -0.48000097, -0.1825465, -0.5088471, 0.3697353, 0.45430598, 0.8172783, -0.6736295, 0.29117474, 0.7695421, -1.0408447, 0.52119935, -1.0195869, 0.2832063, 0.33088642, -0.7863285, -0.4212732, -0.18228154, -0.060602672, -0.75995314, 0.097441256, 0.28645813, 0.42065766, -0.47741637, -0.9883263, -0.12474744, -0.82695615, -0.5433892, -0.5569243, 0.5666042, -0.70668715, 0.5557112, 0.691671, -1.1780283, 3.971095, 0.7957506, 0.25770357, 0.11021638, 0.42697975, 0.8713675, 0.31235036, 0.26822054, 0.026008626, -1.4600271, 0.74118865, -0.5111858, 0.40136346, -0.13379698, -0.0952676, 0.5199549, -0.8880392, -0.7272781, 0.019920304, -1.2613102, -1.2421595, 0.15175271, -0.0041350275, 0.20229766, -0.46223584, 0.12655073, 0.62349856, -1.4330357, 0.28756502, -0.05957569, 1.2458998, -0.16172531, 0.42411068, -0.9624259, -0.08818726, 1.0705507, -0.2271698, -0.014912064, 0.4924598, 0.21578634, -0.21042542, 0.4162497, 0.8242115, -0.62222344, 0.21746461, 0.9617787, -0.661415, 0.47385925, 0.14592873, -1.0489565, 0.27883232, -1.133529, 0.9199604, -0.03536666, -0.13570951, 0.5176608, -0.17372388, -0.4313057, -1.1549846, 0.50092006, 0.26644853, -0.48098832, 0.6671102, -0.2681811, -0.16917849, 0.65855145, 0.7997546, 0.43507904, 0.56593275, -0.4183174, -0.3920829, -0.040696196, 0.30203003, -0.22230984, -0.106130406, -0.02646694, 0.0076597882, 1.1722753, 0.12071346, 0.30593744, -0.35353017, -0.21848239, -0.73449713, -0.58138007, 0.24065803, 0.5042193, -0.21836323, -0.30583268, -0.5858581, 0.37997732, 1.3265992, -0.73592496, -0.66344947, 0.14647153, -0.70926654] |\n", + "|[-0.4551709, -0.70163435, -0.6035281, -0.3169913, 0.025437132, -0.2306121, -0.20132802, -0.081412695, 0.07066718, -0.055439085, 0.74881446, -0.3512355, -0.26094526, -0.2350602, -0.81026095, 0.19681889, -0.61253804, -0.52275616, -0.6364846, -0.21766591, 0.9002938, 0.36509097, -0.24122591, -0.3044385, 0.341242, -0.07776791, -0.27835596, -0.5055679, -0.03446467, 0.1464134, 0.13806966, 0.41370058, 0.8281211, -0.42024046, -0.059138477, -0.47472715, 0.28516707, 0.032230146, -0.9771878, -0.35899377, -0.6510162, -0.052634135, 0.10753336, -0.64031315, -1.1887662, 1.1051648, -0.5773422, 0.22121975, 0.6618141, -0.48757818, 0.018146146, 0.012046531, -0.07839789, 0.820335, -0.0757807, -0.08860019, 0.11312934, -0.35805184, -0.56525445, -0.1551021, -0.1717429, 0.019617721, 0.66372323, -0.88823617, -0.03441553, -0.090847895, -0.38140815, -0.22663991, 0.5422917, 0.258274, -0.4267974, -0.29904717, -0.22342926, -0.38315377, -0.47860774, -0.5051914, 0.2878328, -0.12465787, 0.21801026, -0.28742635, 0.09816755, 0.15171158, -0.39700085, -0.25890568, -1.391231, -0.15071857, 0.4203294, 0.5141271, -0.22714928, 0.22327633, 0.21784519, 0.54919165, 0.14129074, -0.3790369, 1.1241732, 0.104892924, -0.4198049, 0.92994815, 0.55179, 0.3820749, 0.59288776, 0.96944904, 0.30466193, 0.4607785, 0.1625256, 0.13811068, 0.093543805, -0.1400271, -0.6581191, -0.21046872, 0.5118915, -0.25569928, 0.27154118, -0.3472883, -0.56960285, 0.7120866, 0.07952726, 1.5092055, -0.7503186, 0.10473069, -0.152398, 0.65220666, 0.05486054, -0.38216808, -0.30337706, -0.19781199, -0.47477844, 1.1981049, -0.4765049, 0.26239675, 0.12165191, -0.914812, -0.22179532, -0.23710687, 0.37963942, 0.5031719, 0.012867823, 0.267502, 0.1606301, -0.35839677, 0.70598954, 1.0698526, -0.053276297, 1.2676102, 0.88756824, 1.06121, 0.0965237, 0.07371701, 0.49015963, 0.42576092, -0.11854282, 0.7461573, 0.14019571, 0.1598517, 0.388012, -0.76211405, 0.7897362, -0.08700541, 0.26996827, 0.7231568, 0.34978384, 0.6629174, -0.22319318, -0.022929661, 0.5633074, 0.8479109, 0.3134623, -0.23123033, -0.45305863, -0.3666971, 0.61494756, 0.08499868, -0.11429957, 0.42940336, 0.1065914, 0.59769255, 0.1478011, 0.31324622, 0.917471, 0.33907503, -0.64323413, -0.27312815, -0.4304448, 0.7120138, -0.65906286, 0.9559191, 0.08878356, 0.36605653, -1.0530949, -0.5842027, -0.21813433, 1.146963, -0.07565291, -0.23028176, 0.08881605, -0.66926587, -0.63734543, -0.27599198, -0.52426857, -0.7777168, -0.11338934, 0.68038565, 0.12563609, 0.72568506, 0.25353873, -0.5120337, 0.1720592, 0.75100523, -0.7597949, 0.04983168, 0.32006007, 0.37497208, -0.06488529, -1.1407528, 0.85181326, -0.29841664, -0.6072145, 0.06412334, -0.59673625, 0.015888901, -0.61401504, -0.024990201, 1.2502216, 0.29834977, 0.2259481, 0.22838746, 0.12366406, 0.5505371, -0.265045, -0.7516761, -0.36789933, 0.3842855, 0.07458407, 0.70132595, 0.5245755, 0.039067548, 0.8295826, 0.9859915, -0.14915219, 0.76139337, 0.07521062, -0.09708318, 1.1401148, 0.71625185, 0.07143174, -0.2776882, 0.5731596, 0.8229048, -0.26942194, 0.083366916, -0.4581417, 0.45281002, 0.52804524, 0.073925585, -0.7365321, -0.082114294, 0.48873562, 0.8735367, -0.09949603, -0.3917063, -0.41793084, -0.36716086, 0.9228917, 0.09277436, 0.6158804, 0.08426322, -0.40541318, 0.27922451, -0.42673695, -0.8768098, -0.532292, -0.43877745, 0.21541777, -0.84036314, -0.9338167, -0.27049762, 0.062434845, -0.4943151, 0.72105956, 0.46896762, 0.30278048, -0.47912863, -0.774688, 0.63027525, -0.018532742, 0.2356094, 0.05349394, 0.54270697, 0.3651693, 0.43269888, -0.47658673, -0.5483636, 0.55141914, 0.10574282, -0.17872408, -0.20340729, -0.5687217, -0.15721564, -0.8573054, -0.7116311, 0.28784063, 0.43613592, -0.75882775, -0.4317651, -0.068788394, -0.31874433, 0.7245133, -0.3814721, 1.0791386, 0.8593044, 0.46391085, 0.016835675, -0.08866687, -0.07318552, -1.0084502, 0.25235564, 0.88143814, 0.39013064, -1.2940207, -1.5041829, -0.5799888, 0.4251838, -0.10093878, -0.6468498, 0.569529, 0.44151866, 0.44414604, -1.2475989, 0.6201909, -0.5897746, -1.0573081, -0.8247264, -0.5096316, -0.15178315, 0.31330872, 0.064397395, 0.55889845, -0.18725592, -0.15305442, -0.43003336, 0.7837315, -0.8682409, 0.30901197, 0.69158363, 0.18542862, 0.649804, -0.1340111, -0.44167936, -0.50966316, 0.5923718, -0.10204459, -0.3558479, 0.7252553, 0.15812929, 0.4581769, 0.2613917, -0.54172015, -0.727866, 0.39281377, 0.10359567, 0.013240509, 0.46469688, 0.009155966, 0.08111262, 0.35496584, -0.34492856, -0.6159998, 0.4276678, 0.55212504, -1.5351986, 0.5886967, -0.910682, -0.06583256, -0.041773, -0.20721772, -0.21078692, 0.7561223, -0.6514271, 0.4623073, -0.39776376, 0.3708451, 0.5819725, 0.092205144, 0.8695423, 0.66987234, 0.17375964, -0.63116556, -0.6229024, 0.064710595, 0.23313554, -0.62097627, 0.3277422, 0.17015806, -0.17578155, -0.69161373, -1.0635453, 0.25833145, -0.2455835, 1.2248678, -0.24561666, 0.22842243, 0.75291985, 0.0052635744, -0.035903454, 0.07572078, 0.7110245, -0.60164934, 1.0137618, 0.034824155, -0.53436494, -0.626465, -0.28581288, -0.033675298, 0.24354534, 0.6302235, -0.27824193, -0.47326663, 0.23562959, 0.23366168, 1.1514263, -0.39226657, -0.8127899, -0.17110956, 0.57016987, -0.015965996, -0.12682581, -0.017887846, -0.13328603, 1.1098751, 0.20358285, 0.6528478, -0.7196328, -0.2901754, -0.3614997, -0.22485083, -0.5137714, 0.3530948, -0.10823232, 0.23896721, -0.68827224, 0.15481287, 0.012020431, -0.4731145, 0.033058036, -0.5305248, 0.08287228, -0.28575435, 0.6227161, -0.641858, -1.1728522, 0.62582886, -1.4392146, 0.31439683, 0.60814303, -0.45449677, 0.18382646, 0.13024223, 0.7485024, 0.5675367, -0.8100865, 0.14011377, -0.20686577, 0.55566907, -0.3106068, -0.8396482, 0.1989356, -0.60109407, -0.20033756, 0.24594584, 0.5189546, 0.079373576, 0.20293728, -0.37094578, -0.72739553, 0.36316448, -0.58322275, 0.84744644, -0.013424344, 0.02326477, -0.13142128, -0.76925874, 0.07906607, 0.13474335, 0.13118565, -0.9391377, -1.5082085, -0.19781312, -0.3731102, -0.2720205, 0.00692562, 0.025020853, -0.2584547, -0.24265918, -0.24179468, 0.35472304, 0.2042382, -1.2032343, 0.0549397, 0.2587004, 0.40530026, 0.7703206, -0.24824508, -0.61139464, 0.045944918, -0.23381308, -0.07762793, -0.61961603, -0.25310314, 0.5748831, -0.4447055, 0.076355085, -0.23757423, -0.8369562, 0.089977905, 0.5902311, 0.33559164, -0.13643366, 0.25057355, -1.0978181, 0.69042736, 0.6901258, -1.3477001, 0.76183826, 0.5575151, -0.015667006, 0.9782686, 0.5533853, 0.1009081, -1.3234254, -1.4176273, 0.3129585, -0.526839, -1.0355, -0.8533209, -0.5631923, -0.027041657, 0.8463708, 0.023296863, -0.34260577, -0.09500613, -0.36492276, 0.78022486, 0.1197754, -1.2277046, -0.6279744, -0.04437034, 0.50823486, 1.3691553, 0.21508895, 0.3084299, 0.4325333, 0.72897035, 0.16580735, -0.52901083, -0.82335, -0.74103034, -0.5484357, 0.5588962, -0.11481526, -0.08113286, -0.59770274, -0.058776468, -0.29998526, -0.36497813, -0.2780294, -0.9146225, -0.16754018, -0.44520885, 0.7373907, -0.3625276, 0.079493694, -0.5173317, 0.76892483, 0.4574433, -0.73441136, -0.4436862, -0.053030964, -1.0077462, -1.062229, 0.30557042, -0.33302367, -0.08556651, -1.2978424, 0.33813486, 0.102711126, -0.088076115, -0.08987713, 1.4386457, -0.013510503, -0.8407555, -0.14436567, 0.8664508, 0.10949549, -0.7896428, 0.32810703, -0.24064283, -1.237232, -0.3580859, -0.96074474, -0.8312191, 0.5995957, 0.10229929, 0.6401469, -0.108288124, 1.3422477, 0.18500027, -0.75620437, -0.8980149, 0.21080938, 0.42935905, 0.18417227, 1.2469771, -0.37615737, -0.11390341, 0.65729576, 0.14926647, 0.14030153, 0.11101743, 0.104942575, -0.639655, 0.78139997, 0.76999396, 0.32608628, -0.33680677, -0.94830394, -0.23774934, -0.11751856, -0.774399, -0.8527501, 0.0026873052, 0.012190238, -0.347095, 0.7506344, 0.97502446, -0.0542318, 0.35923266, -0.7461533, 0.27257353, -0.35244346, -0.017602356, 1.0138553, -1.1232442, -0.1301233, -0.5888073, 0.41839808, -0.34727526, -0.33090815, 0.23368281, 0.48094663, 0.13439015, -0.13493088, 0.02846104, 0.5125046, 0.44507113, 0.097483635, -0.49870452, -0.033923425, -0.7129267, 0.11823985, 1.0950165, -0.4535986, 0.17793982, -0.80702394, 0.11657547, 0.7330025, 0.30222726, -0.54812133, -0.3967128, -0.018333225, -1.4531355, -0.70900095, -0.30205098, 0.12573394, 0.074850276, 0.71075875, 0.84932506, -0.22506133, -0.45146462, 0.5424543, -0.0329109, 0.6406066, 0.067840874, 0.493005, -0.35784528, -0.4867417, -0.78502184, -0.1775056, -0.2186358, -0.13203987, -0.31894717, 0.7113457, -1.0520144, 0.95741266, -0.6636828, -0.08345311, 0.27045783, -1.0879714, -0.19023906, 1.4729404, 0.4009041, 0.3883456, -0.054328308, -0.8045848, 0.37958297, 0.16966741, -0.49540728, 0.04402849, 0.02939259, 0.15930714, -0.25440115, -0.5974852, -0.7090542, 0.22592059, -0.31607047, 0.06115264, -0.14653721, -0.060999125, -0.7463229, -0.102479115, -0.42489374, 0.35532656, -0.2559446, 0.46266356, 0.66947347, 0.030309279, 0.6828144, 0.015641987, 1.0307397, -0.1228115, 0.20427805, 0.74302804, 0.12176715, 1.0190009, 0.06665168, 0.5506368, -0.3065994, -0.36567304, -1.0400832, 0.015762173, 0.82507867, 0.25918883, 0.10604009, -0.41380864, 0.23031777, -1.0098071, -0.39513525, 0.31596133, -0.62706226, -0.6312685, 0.41689426, 0.11040272, -0.38683218, -0.3696043, -0.2994109, 0.5462005, -0.11033595, 0.09255796, -0.31537542, 0.56904656, 0.1464524, 0.1265744, -0.42599225, 0.6080784, -0.05491855, 0.18983993, 0.5803222, -0.5503094, 0.18081766, 0.4805601, 0.8143552, -0.38658005, -0.10422094, 0.16694511, -0.18582281, 0.21258326, 0.7216259, 0.19344145, 0.06810588, 0.8053995, 0.3694606, 0.9503034, -0.7352158, 0.4681606, 0.60113674, -0.25713086, -0.33469814, 0.87695926, 0.5432129, 0.6226189, 0.5512795, 0.19257343, 0.8535951, -0.17047109, -0.46693063, -0.99682933, 0.6991311, 0.63933563, 0.84690917, -0.029548392, 0.5821303, 1.0301592, -0.47241428, 0.5896091, 0.5307274, 0.59721744, -0.90897137, 1.162624, -0.7926741, -0.46787092, -0.48402387, -0.37264812, -0.04336764, -0.5336789, -0.30703595, -0.5837924, -0.46866745, 0.60307366, -0.4672035, 0.8453903, 0.8617998, -0.19447571, -0.13171698, 0.49328992, -0.71932757, -0.55928624, 0.0963739, 0.13552791, 0.5589572, 0.3155367, -0.2581406, 0.27591968, -1.674656, 0.7526804, 1.192655, 0.21420032, -0.07484672, -0.24580102, -0.13316363, -0.1273264, -0.3881386, 0.27548027, 0.71821785, -0.6946836, -0.5342053, -0.74412453, 0.4891968, -0.39147562, 0.4109668, 0.4634068, -0.25212643, -0.19664827, 0.7433994, -0.059163496, 0.9143681, 0.39652926, 0.007923182, -0.4271866, 0.65446675, 0.60351187, -0.55228436, -0.066127226, 0.20048201, -0.1783379, -0.7761564, 0.5741099, -1.0137444, 0.38790837, -0.4525237, 0.13739175, -0.033453833, 1.4124758, -0.13379958, -0.583942, -0.04621723, -0.16957527, -0.97850686, 0.1919263, 0.4060528, 0.8652411, -0.019284757, 0.06944285, -0.8803334, -0.7363344, -0.11443796, -0.63164127, 0.009957433, 0.33984706, -0.99867815, -0.42615682, -0.56831014, 0.033885993, 0.084734306, -0.21979554, -0.6001108, 0.6900444, 1.0764246, -0.2677003, -0.1013016, -0.90503246, 0.17374404, 0.44298458, 0.6662711, -0.76458246, 0.31726596, 0.8901795, -1.0684905, 0.429524, -0.9525809, 0.34783778, 0.3166461, -0.8297357, -0.39729702, -0.38072085, -0.047966488, -0.46958792, 0.058861047, 0.35266572, 0.54490906, -0.54215884, -1.0408572, 0.010371257, -0.5894134, -0.39944065, -0.6312636, 0.53162205, -0.76222324, 0.43728936, 0.6765569, -1.0700008, 3.7162342, 0.61356515, 0.2817377, 0.034929775, 0.47877297, 0.67838544, 0.25168842, -0.14592102, 0.18171452, -1.3681096, 0.7624925, -0.43829766, 0.2598109, 0.0694051, 0.18588111, 0.47362673, -1.002829, -0.65723634, 0.19707373, -1.390607, -1.3689182, 0.077330425, -0.14296362, 0.17581113, -0.40944675, -0.052300513, 0.6057863, -1.2497592, 0.5430817, -0.040802605, 1.0258805, -0.089827314, 0.6303327, -1.1374685, 0.06361517, 1.2228382, -0.110210955, 0.011645393, 0.44378254, 0.29626763, -0.16757236, 0.31120366, 0.7242587, -0.6392926, 0.31130984, 1.1402434, -0.4814929, 0.623396, 0.4310219, -0.98024565, 0.4161265, -1.1583498, 0.8957772, -0.2533521, -0.2608206, 0.75448495, 0.0822129, -0.10624109, -1.1427642, 0.37024376, 0.25088653, -0.28265768, 0.40957695, -0.39538112, -0.21654099, 0.61192775, 0.8584819, 0.580511, 0.6026567, -0.31958818, -0.39476672, -0.36555025, 0.3159156, -0.33924237, -0.09456606, -0.043775205, 0.09674178, 1.4304655, 0.105286345, 0.21743613, -0.21186616, -0.0015842356, -0.6536013, -0.51343346, 0.03706929, 0.73039323, -0.28604445, -0.15746701, -0.5944098, 0.6905979, 1.4163809, -0.99507356, -0.63081086, 0.029420458, -0.6010649] |\n", + "|[-0.35897657, -0.59681326, -0.55103576, -0.017464325, 0.09312774, -0.2713198, -0.13424516, -0.3563898, -0.0944013, -0.4225392, 0.90315217, -0.18185449, -0.119069874, -0.448287, -0.2902321, 0.16577157, -0.5209177, -0.799673, -0.50480187, -0.6038504, 0.7075958, 0.14962278, -0.22613174, 0.051767576, 0.018326834, -0.10164401, -0.5150327, -0.61912394, 0.27898523, 0.003830729, -0.051076904, 0.18490505, 0.6545066, -0.42132688, 0.116316505, -0.7604764, 0.574684, -0.12133051, -0.66726285, -0.5245044, -0.62922907, 0.15403777, 0.12166775, -0.33014202, -0.5269007, 0.39859325, -0.44196025, 0.34154075, 0.5566406, -0.31607345, 0.3334192, -0.021404052, -0.2894583, 0.7359796, -0.024437333, 0.12277231, 0.36966592, -0.3218324, -0.65263236, 0.098370954, -0.06930113, 0.09168893, 0.5838433, -0.63965476, -0.22882333, -0.1079179, -0.2074405, 0.010149248, 0.5040266, 0.33137292, -0.3242347, -0.70655864, 0.016328305, -0.16405024, -0.6463478, -0.56387764, 0.5061748, -0.09114296, 0.9605605, -0.05143784, 0.6096327, -0.026049353, -0.501395, -0.48825562, -0.9527407, -0.17042696, 0.2626127, 0.18641135, -0.46719983, 0.12436834, -0.012662695, 0.14802448, 0.07204375, -0.3324793, 1.52328, 0.1871331, -0.50601155, 0.9538381, 0.32153454, 0.2847594, 0.51317745, 0.8438036, 0.09811611, 0.78442186, 0.2725978, 0.08721259, -0.059145622, -0.2713952, -0.55038077, -0.27695614, 0.42659882, 0.16261695, -0.014315963, -0.3870172, -0.30484784, 0.67655444, 0.07562356, 1.6128407, -0.8088257, 0.20623067, 0.020674512, 0.8854955, 0.22905532, -0.23485193, -0.24570814, 0.15671179, -0.36827564, 1.3750341, -0.22861782, -0.3176481, -0.23611338, -0.7067164, -0.3132918, -0.41665304, 0.0037914505, 0.36153054, -0.17560913, 0.28409123, 0.033968344, -0.40104192, 0.5090354, 0.85489756, 0.22245865, 0.9479827, 0.631639, 1.0019124, -0.060465634, 0.08393854, 1.1981753, 0.3075608, -0.05365785, 0.49154857, 0.16745849, 0.017315201, 0.5546208, -0.50047004, 0.53260714, 0.17493254, -0.013086088, 0.6164884, -0.19005987, 0.29742283, -0.16058491, -0.17646764, 0.24671704, 0.6852615, -0.03297328, -0.15448706, -0.006446299, -0.25061646, 0.76130223, 0.03748561, -0.24066009, 0.22806385, 0.10494895, 0.3726139, -0.09149992, 0.4124735, 0.9276814, 0.44368964, -0.5183963, -0.105808586, -0.35111445, 0.21315892, -0.21271648, 0.70725405, 0.23774336, 0.44611734, -0.725247, -0.39595404, -0.23219116, 1.0372642, -0.034154378, -0.36179188, 0.030423094, -0.2009669, -0.6864444, 0.18743677, -0.34166652, -0.50224555, 0.025118373, 0.42190975, 0.12621327, 0.27227336, -0.07357074, -0.2592756, -0.16833289, 0.2836354, -0.5190077, -0.113953695, 0.55298984, 0.3450941, 0.16548364, -0.82781833, 0.7340566, 0.18499672, -0.5069908, -0.072726235, -0.38879964, -0.055157106, -0.47107115, 0.09596908, 0.62429005, -0.052211456, 0.3174872, 0.085850574, 0.18632552, 0.45848712, -0.22876252, -0.45475033, -0.43765798, 0.37713552, 0.07346249, 0.4847354, 0.6870481, -0.27333483, 0.67682195, 0.7020913, 0.009961465, 1.3145256, 0.381922, -0.12317006, 0.82815987, 0.44666597, 0.0010002721, -0.47843087, 0.3716425, 0.6426474, -0.42115375, 0.014641404, -0.23971468, 0.5258947, 0.45882913, 0.11545064, -0.717386, -0.027019035, 0.5999405, 0.9919348, 0.09145116, -0.32792652, -0.8171095, -0.3490077, 0.464805, 0.14003204, 0.44574833, -0.062771976, -0.28770643, 0.049434617, -0.19530576, -0.686185, -0.6966303, -0.29608256, -0.035950255, -0.41464067, -0.82639873, -0.24570721, 0.25647783, -0.3749604, 0.89746475, 0.18805066, 0.3962944, -0.4536715, -1.0855614, 0.9006727, -0.116365045, 0.5319952, -0.22673309, 0.55920154, 0.23648229, 0.3737629, -0.66690487, -0.28025305, 0.069745965, 0.21275419, -0.42553252, -0.30567142, -0.20592386, -0.3663789, -0.49159375, -0.6851477, 0.03553414, 0.285037, -0.90309703, -0.51916325, 0.25084245, -0.122537725, 0.6221236, -0.50015694, 1.1304146, 0.65097535, 0.5885583, 0.059304044, 0.6191802, -0.003565181, -0.30282536, 0.37651873, 0.75706947, 0.34692872, -0.8705569, -1.038109, -0.35747954, 0.73814917, -0.24776767, -0.10423, 0.5696532, 0.2967611, 0.25904027, -0.8313744, 0.7213189, -0.92271197, -0.9310771, -0.7410905, -0.29385236, -0.33229595, 0.30910793, 0.07355167, 0.14044145, -0.4999548, -0.17485242, -0.123153575, 1.1976104, -0.7481602, 0.23300362, 0.08233521, 0.21903454, 0.38073897, -0.22888009, -0.42353395, -0.5447405, 0.028418317, -0.35089937, -0.48370728, 0.5737886, -0.23907408, 0.63989955, 0.449534, -0.5731537, -0.8642304, 0.17422691, -0.15496956, -0.051586196, 0.003235167, 0.23052679, 0.14522518, 0.40381587, -0.26941395, -0.5365944, 0.14019437, 0.3489613, -1.3437259, 0.65717715, -0.54339415, -0.0029523952, -0.14856374, 0.16905507, -0.24917974, 0.55597496, -0.4739228, 0.31121734, -0.313433, -0.0336047, 0.49510106, 0.4901604, 0.6509231, 0.48417187, 0.3219672, -0.17090097, -0.54029745, 0.10981972, 0.42579573, -0.87581193, 0.50012684, 0.2255276, -0.22131851, -0.22059548, -0.70264304, 0.06828149, -0.5111565, 0.747072, -0.1638287, 0.55353147, 0.58637184, 0.44922632, 0.0025660289, -0.005655052, 0.60599947, -0.58319515, 0.5967522, 0.028336586, -0.74574643, -0.24443784, -0.38441056, -0.34483236, 0.18689941, 0.8986684, -0.34572875, -0.27980623, -0.06598415, 0.040206447, 1.1569693, 0.10914889, -0.4713264, 0.1807248, 0.49042118, -0.014481099, -0.06203507, 0.18307155, -0.20951742, 1.0942575, -0.041852377, 0.5791845, -0.61249876, -0.35714915, -0.13576367, -0.40657127, -0.90397096, 0.31894818, -0.07552212, 0.033747077, -0.66481394, 0.1361896, 0.4387108, -0.7391113, -0.042033337, -0.36612615, 0.15459055, 0.121530145, 0.17773299, -0.5919986, -1.1900691, 0.5082296, -1.3296392, 0.47870925, 0.5231443, -0.112532005, 0.076233104, 0.27437687, 0.33703035, 0.6013475, -0.7704938, 0.10007467, -0.012281254, 0.4971337, -0.37090576, -0.6805443, 0.49376374, -0.36829984, -0.01649481, 0.30986983, -0.17191978, -0.4988367, 0.19542322, -0.2729561, -0.5225831, 0.2863492, -0.20678303, 0.48263678, -0.026468784, -0.01263272, -0.4331221, -0.30048212, 0.06173227, 0.017459914, -0.08988567, -0.8343948, -0.8608158, -0.16213661, -0.2484763, -0.39859822, 0.01679629, 0.3314475, -0.302128, -0.17009199, 0.026395082, 0.21916495, 0.1565451, -0.7637796, 0.31162113, -0.14178517, 0.31877327, 0.6160362, -0.0053886026, -0.20011209, 0.3768263, -0.36727834, -0.040444434, -0.35557812, -0.42604464, 0.3892638, -0.42013842, 0.21087703, -0.008809134, -0.63346285, -0.50277805, -0.24889061, -0.08358593, 0.04716825, 0.47264668, -0.9389473, 0.34550074, 0.36532387, -1.3534575, 0.46846837, 0.13720585, -0.0016515255, 0.94921774, 0.31535655, 0.10860132, -0.46066004, -1.5270576, 0.42425168, -0.2837406, -0.8352561, -0.5661316, -0.40987056, -0.11690422, 0.64570194, -0.032606047, -0.13726436, -0.09686018, -0.43860164, 0.7258069, -0.17626634, -1.0735798, -0.5980452, 0.16761182, 0.38345897, 1.2237543, 0.55995506, 0.015511641, 0.2080602, 0.47081903, -0.24136937, -0.6008604, -0.29128167, -0.5382447, -0.8061539, 0.59341455, 0.03858564, -0.12641078, -1.015225, -0.25187618, -0.32378653, -0.6648832, -0.17979485, -0.56347203, -0.009731012, -0.38404137, 0.55240536, -0.12135397, 0.03034405, -0.76478726, 0.7039614, 0.18344924, -0.84574556, -0.31489664, 0.21936554, -1.3677069, -1.2568625, 0.33289716, -0.81709224, -0.60048485, -1.3509372, 0.40320086, -0.092953816, 0.014445566, 0.20337312, 1.4854242, -0.44953766, -0.5591788, 0.3206689, 0.7137852, -0.2513296, -0.62899876, 0.3015239, 0.21784401, -0.80675817, -0.053752296, -1.0567565, -0.6065273, 0.7251795, -0.1782733, 0.9480454, 0.18095893, 1.3675978, -0.75200397, -0.59889716, -0.91554505, -0.28515273, 0.054193296, 0.26159796, 1.3221959, -0.24940006, -0.2059533, 0.5702418, 0.03929296, 0.25510088, -0.24527693, -0.116221726, -0.63265884, 0.53928804, 0.7911107, 0.07315626, -0.12319082, -0.5107717, -0.063933164, -0.15296449, -0.34180734, -0.7266232, 0.6913548, -0.22002009, -0.5282207, 0.7053563, 0.7781687, -0.15187266, 0.59985596, 0.12365617, 0.08343246, -0.2326469, -0.3898979, 0.65606254, -0.74034387, -0.17596407, -0.2864832, 0.44101918, 0.05512544, -0.33402017, 0.017480843, 0.3042743, 0.13832268, -0.2587437, 0.056693166, 0.6044228, 0.34458554, -0.020583678, -0.5116036, 0.07352207, -0.5418275, -0.13126434, 1.0370631, -0.015560612, 0.46716228, -0.43269154, 0.21480915, 0.92504567, 0.1926696, -0.20822726, -0.47975922, -0.037531853, -1.5389484, -0.29742154, -0.08081447, 0.20810753, 0.25440332, 0.63529253, 0.60441184, -0.25518557, -0.08830049, 0.16533685, -0.0135279, 0.80249137, -0.557658, 0.58468765, -0.343546, -0.39302948, -0.8875715, -0.32381222, -0.14705527, -0.08710295, -0.6017603, 0.9301608, -0.94257927, 0.77397394, -0.8047182, -0.020348243, 0.07668312, -0.775558, 0.069633305, 1.5365188, -0.010050952, 0.35062814, 0.27802393, -0.52927583, 0.34218082, 0.27215108, -0.42328703, -0.17985764, -0.9643514, 0.3239216, -0.40086162, -0.6959332, -0.96576613, 0.274632, -0.1982577, -0.06370762, -0.4532983, -0.16122049, -0.9394961, -0.16988641, -0.32010135, 0.49510896, -0.28706497, 0.40926948, 0.57865715, -0.12827766, 0.43410924, -0.026546948, 0.92637074, -0.046021186, -0.16754751, 0.34568113, 0.9649791, 0.8326267, 0.1834364, 0.38661402, -0.014618337, -0.27619925, -0.56319296, 0.15034072, 0.40660876, 0.51266646, 0.7549045, -0.2656055, 0.4443237, -0.82253766, -0.39975893, -0.061087295, -0.26614782, -0.3759517, 0.4856435, 0.18834534, -0.44131136, -0.09906082, -0.5289501, 0.5958468, -0.111974336, 0.036898043, -0.15734759, 0.34834725, 0.14523585, -0.039628357, -0.5721265, 0.98761886, -0.15976647, 0.11126393, 0.14825308, -0.42199856, 0.030500315, 0.702436, 0.79447556, -0.1985972, 0.00362264, 0.6287025, -0.17087021, 0.008886382, 0.49406105, 0.096543975, -0.09883066, 0.93678427, 0.11811575, 0.31863317, -0.8040977, 0.36487103, 0.36864552, -0.1827693, -0.09571028, 0.5192472, 0.1479836, 0.11981625, 0.4210798, 0.176584, 0.18131834, 0.19279635, -0.57580817, -0.71499187, 0.38192308, 0.8970314, 0.85978615, -0.46786532, 0.39112425, 0.43772766, -0.38042483, 0.48755312, 0.21802172, 0.51915115, -0.90187657, 0.98449993, -0.6805995, -0.23627095, -0.17116518, -0.1710498, 0.067521386, -0.59447306, 0.07597451, -0.050724857, -0.021253604, 0.46363872, -0.5160622, 0.3767867, 0.41220078, -0.19461434, -0.05644679, 0.6243832, -0.8931234, -0.3479757, -0.12892357, -0.07494782, 0.10058516, 0.45232406, -0.17701346, 0.24651828, -1.4870961, 0.8415622, 0.9397263, 0.16592272, -0.023334943, -0.23283626, -0.28385338, -0.001450099, -0.6735244, 0.47661752, 0.5322372, -0.26379704, -0.3593277, -0.82683843, 0.41019377, -0.6221298, 0.4880636, 0.30322093, -0.07928008, -0.12791961, 0.6982516, -0.17000294, 0.39764965, 0.46489537, -0.016622137, -0.21562736, 0.66359895, 0.8911648, -0.72263134, -0.2241365, 0.23437786, 0.024908833, -0.27102163, 0.41851312, -0.5933987, 0.359057, -0.47903562, -0.19923219, 0.14513351, 0.9444317, -0.36339658, -0.47624993, -0.10749459, 0.040384546, -0.44108003, -0.036803693, 0.3097427, 0.43607265, 0.2121554, 0.140568, -0.59498185, -0.48031226, -0.11195163, -0.78449756, -0.05053182, 0.3151122, -0.6420899, -0.54836386, -0.5454042, 0.0691715, 0.19924527, -0.33220154, -0.25046748, 0.614781, 0.81399196, -0.037820578, 0.042210273, -0.46795863, 0.37163508, 0.13784549, 0.48491338, 0.03872712, 0.17956483, 0.9661279, -0.7149761, 0.34027272, -1.0228583, 0.4252489, 0.31149942, -0.95872957, -0.43324682, -0.87626946, 0.0023450684, -0.7944293, 0.2034812, 0.5362983, 0.5770597, -0.15400304, -0.48644653, -0.221178, -0.69166416, -0.52299905, -0.65276134, 0.5411688, -0.41724434, 0.3850983, 0.55748796, -0.39397466, 4.2382865, 0.23195827, 0.06872882, -0.22397324, 0.4375539, 0.839061, 0.31089967, -0.31954578, 0.006180516, -1.3736545, 0.46117347, -0.824713, 0.23854768, -0.12416801, 0.10402848, 0.26419544, -0.9571657, -0.44649768, 0.09250796, -0.75244206, -0.8610541, 0.40063858, 0.04109349, 0.36432105, 0.05986332, -0.23050436, 1.0522761, -1.3608738, 0.3848923, -0.043846734, 0.70622057, 4.386697E-4, 0.4012955, -1.1308628, -0.091340944, 1.2571012, -0.12098083, 0.07884159, 0.17039572, -0.3461215, -0.27642882, 0.08751892, 0.6390081, -0.77167684, 0.38708687, 1.1285464, -0.41481158, 0.57584685, 0.35248268, -1.0297804, 0.30069536, -0.804142, 0.87476945, -0.32390264, -0.72924566, 0.4675096, -0.3025772, 0.14686985, -0.64332646, 0.27220792, 0.3578187, -0.5363454, 0.13928713, -0.2801098, -0.37244883, 0.5766619, 0.6739234, 0.11305577, 0.4776733, -0.07123905, -0.23289755, -0.4101974, 0.4693366, 0.044429496, -0.19437085, -0.09739445, 0.519772, 0.9965986, 0.06364854, 0.077697136, -0.33480817, 0.07536717, -0.82209444, -0.64667714, -0.20978113, 0.6434662, -0.5052611, -0.3935268, -0.45977864, 1.3920935, 0.9089069, -1.0275921, -0.45963818, 0.07254938, -0.34282392]|\n", + "+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "\n" + ] + } + ], + "source": [ + "result.selectExpr(\"explode(snowflake.embeddings) as embeddings\").show(truncate=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "5YWVcqLf-LBf" + }, + "source": [ + "That's it! You can now go wild and use hundreds of snowflake models from HuggingFace 🤗 in Spark NLP 🚀\n" + ] + } + ], + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "01a13838b42c4b74b86688d23f83d0e6": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_ab8d5f78f9f148f6882ea5f8df117b05", + "IPY_MODEL_130f3d7b83c34fcdb57c052db4f380ec", + "IPY_MODEL_e32b85e3d4914b179040727cb0a07c7c" + ], + "layout": "IPY_MODEL_c36e66d1c0ec4d3288725f79aab2c804" + } + }, + "ab8d5f78f9f148f6882ea5f8df117b05": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_e523b368b4bf4730ab0d1a461b7adb0e", + "placeholder": "​", + "style": "IPY_MODEL_ab98619de7e3408a9f996750afc5b710", + "value": "config.json: 100%" + } + }, + "130f3d7b83c34fcdb57c052db4f380ec": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_f54d071adef04bc2aeafbe092421de2f", + "max": 738, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_00b2642351554eb7ad3d0a18b441370d", + "value": 738 + } + }, + "e32b85e3d4914b179040727cb0a07c7c": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_914f40f0500d4cafa0413cc07e4bd316", + "placeholder": "​", + "style": "IPY_MODEL_e4b8ca47ec964020977ffc7e83a6116a", + "value": " 738/738 [00:00<00:00, 5.65kB/s]" + } + }, + "c36e66d1c0ec4d3288725f79aab2c804": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "e523b368b4bf4730ab0d1a461b7adb0e": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "ab98619de7e3408a9f996750afc5b710": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "f54d071adef04bc2aeafbe092421de2f": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "00b2642351554eb7ad3d0a18b441370d": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "914f40f0500d4cafa0413cc07e4bd316": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "e4b8ca47ec964020977ffc7e83a6116a": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "01de9036be0c44699be8dc1581ed9677": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_443d8cc16ad24d8c9e47408df83a6c53", + "IPY_MODEL_d7669040f645442781b7d51cb7262201", + "IPY_MODEL_e9333e4b41bf4067885461897bd3c0d9" + ], + "layout": "IPY_MODEL_977eb0478f814cb296c41fd5d1a2c97f" + } + }, + "443d8cc16ad24d8c9e47408df83a6c53": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_c49df48d81c0463e95fe32c5225e3464", + "placeholder": "​", + "style": "IPY_MODEL_df2cb36d5f514461bd23ee0b10fe177f", + "value": "model.safetensors: 100%" + } + }, + "d7669040f645442781b7d51cb7262201": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_e7b37140377e415abdda1d54a36c866f", + "max": 435588776, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_1013375302bb40d59aec7e5863923fbd", + "value": 435588776 + } + }, + "e9333e4b41bf4067885461897bd3c0d9": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_0f67f4199ceb40bd9b933db1206e7122", + "placeholder": "​", + "style": "IPY_MODEL_8afec1bc2fe74a1b85709b2cd1970e17", + "value": " 436M/436M [00:05<00:00, 82.5MB/s]" + } + }, + "977eb0478f814cb296c41fd5d1a2c97f": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "c49df48d81c0463e95fe32c5225e3464": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "df2cb36d5f514461bd23ee0b10fe177f": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "e7b37140377e415abdda1d54a36c866f": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "1013375302bb40d59aec7e5863923fbd": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "0f67f4199ceb40bd9b933db1206e7122": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "8afec1bc2fe74a1b85709b2cd1970e17": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "7c49fcb0adf24fffacfc78f219ee2458": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_3b5c54406b66456b9385d2e43e604fe9", + "IPY_MODEL_ff793bb70a0c48b38309f1264edc5b9d", + "IPY_MODEL_1c15a50073c94145b76c692f0b07f925" + ], + "layout": "IPY_MODEL_87c0ee6209214c18aaaa357346149c14" + } + }, + "3b5c54406b66456b9385d2e43e604fe9": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_954976c4327a46e0af403fbb9ff36934", + "placeholder": "​", + "style": "IPY_MODEL_6b5087b00109475f83dba93784a6ec82", + "value": "tokenizer_config.json: 100%" + } + }, + "ff793bb70a0c48b38309f1264edc5b9d": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_5f90c27303e44db88a38b863af98e55f", + "max": 1381, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_fe8d36df978945248825a71d33b0c841", + "value": 1381 + } + }, + "1c15a50073c94145b76c692f0b07f925": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_76bac98bd4634405ac1c5c27b4dd0a7b", + "placeholder": "​", + "style": "IPY_MODEL_16dc91e1466c4b9eb4dd96a698930a34", + "value": " 1.38k/1.38k [00:00<00:00, 7.83kB/s]" + } + }, + "87c0ee6209214c18aaaa357346149c14": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "954976c4327a46e0af403fbb9ff36934": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "6b5087b00109475f83dba93784a6ec82": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "5f90c27303e44db88a38b863af98e55f": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "fe8d36df978945248825a71d33b0c841": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "76bac98bd4634405ac1c5c27b4dd0a7b": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "16dc91e1466c4b9eb4dd96a698930a34": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "ca49cc4036424696af4112ba2ac316d3": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_67e095fa699f4a189bede10cf0b48c8d", + "IPY_MODEL_3786a94ac75b41b3843c37b852afcb05", + "IPY_MODEL_b13431eee37e42528dd7e090208cb09b" + ], + "layout": "IPY_MODEL_2eb9e953086f4735b28afd4fdce98aeb" + } + }, + "67e095fa699f4a189bede10cf0b48c8d": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_938226ecebf746f48ea13307b4fe0f5d", + "placeholder": "​", + "style": "IPY_MODEL_530793d3faf24f4da6927256ddfeddea", + "value": "vocab.txt: 100%" + } + }, + "3786a94ac75b41b3843c37b852afcb05": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_cd760140c4f340ec931f83a1464696a8", + "max": 231508, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_5065495ab87244cd96c99742eff83f2a", + "value": 231508 + } + }, + "b13431eee37e42528dd7e090208cb09b": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_64abfed62f454f88a5b8cadd0a5bdfdb", + "placeholder": "​", + "style": "IPY_MODEL_ca8b10b455b349bd922c7861265dc7a1", + "value": " 232k/232k [00:00<00:00, 4.33MB/s]" + } + }, + "2eb9e953086f4735b28afd4fdce98aeb": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "938226ecebf746f48ea13307b4fe0f5d": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "530793d3faf24f4da6927256ddfeddea": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "cd760140c4f340ec931f83a1464696a8": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "5065495ab87244cd96c99742eff83f2a": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "64abfed62f454f88a5b8cadd0a5bdfdb": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "ca8b10b455b349bd922c7861265dc7a1": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "17d6731b17bc40e2905e578a165ee928": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_6cc1ae19c71c4444bc97a99b6b4d5960", + "IPY_MODEL_c330031d110745558faaa4fa5a1f47f4", + "IPY_MODEL_d4e23f3a28a141c2bc0b559fcb7eee84" + ], + "layout": "IPY_MODEL_82c9b4084b8b4cdda57f31ead7a0296e" + } + }, + "6cc1ae19c71c4444bc97a99b6b4d5960": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_0a863eed28e2448ba134e48de2a12c02", + "placeholder": "​", + "style": "IPY_MODEL_5485f261163d423d916099eb0c4e1cd9", + "value": "tokenizer.json: 100%" + } + }, + "c330031d110745558faaa4fa5a1f47f4": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_d3b9645dafb84488bd951788afca600c", + "max": 711649, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_bf94c73d8dd04bbabcb61da1afc31765", + "value": 711649 + } + }, + "d4e23f3a28a141c2bc0b559fcb7eee84": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_23bed1e463e84c42a3c3a7a9e314f8ab", + "placeholder": "​", + "style": "IPY_MODEL_cb3d3466c0254682b7eb04cd6bff63e7", + "value": " 712k/712k [00:00<00:00, 25.4MB/s]" + } + }, + "82c9b4084b8b4cdda57f31ead7a0296e": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "0a863eed28e2448ba134e48de2a12c02": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "5485f261163d423d916099eb0c4e1cd9": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "d3b9645dafb84488bd951788afca600c": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "bf94c73d8dd04bbabcb61da1afc31765": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "23bed1e463e84c42a3c3a7a9e314f8ab": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "cb3d3466c0254682b7eb04cd6bff63e7": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "a4bc7091346943f99a8c3df133d8aaec": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_24b8fd85041f459b856f7a81f4794e1e", + "IPY_MODEL_077b3a45937044099a93762ae19ab486", + "IPY_MODEL_925068f3d6ff42b8876ab2b589a00858" + ], + "layout": "IPY_MODEL_4b097fd1ba834560bd8de0537345db83" + } + }, + "24b8fd85041f459b856f7a81f4794e1e": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_b652a620066f4fa0b1705efcee874386", + "placeholder": "​", + "style": "IPY_MODEL_4530c6787c1444ddaf8d55c98c1e539f", + "value": "special_tokens_map.json: 100%" + } + }, + "077b3a45937044099a93762ae19ab486": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_ab704bfff72b4a4d9819513647f0838e", + "max": 695, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_a9bda9ed4e1243d2a914003ff93d71ad", + "value": 695 + } + }, + "925068f3d6ff42b8876ab2b589a00858": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_3247354c44e343718d6be93cd153ae98", + "placeholder": "​", + "style": "IPY_MODEL_89cc2e327672474c924f8835cd7d514c", + "value": " 695/695 [00:00<00:00, 39.7kB/s]" + } + }, + "4b097fd1ba834560bd8de0537345db83": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "b652a620066f4fa0b1705efcee874386": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "4530c6787c1444ddaf8d55c98c1e539f": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "ab704bfff72b4a4d9819513647f0838e": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "a9bda9ed4e1243d2a914003ff93d71ad": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "3247354c44e343718d6be93cd153ae98": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "89cc2e327672474c924f8835cd7d514c": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + } + } + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file diff --git a/src/main/scala/com/johnsnowlabs/ml/ai/ConvNextClassifier.scala b/src/main/scala/com/johnsnowlabs/ml/ai/ConvNextClassifier.scala index 523122b93ea923..1ff4ff1352e360 100644 --- a/src/main/scala/com/johnsnowlabs/ml/ai/ConvNextClassifier.scala +++ b/src/main/scala/com/johnsnowlabs/ml/ai/ConvNextClassifier.scala @@ -21,14 +21,16 @@ import com.johnsnowlabs.nlp._ import com.johnsnowlabs.nlp.annotators.cv.feature_extractor.Preprocessor import com.johnsnowlabs.nlp.annotators.cv.util.io.ImageIOUtils import com.johnsnowlabs.nlp.annotators.cv.util.transform.ImageResizeUtils +import com.johnsnowlabs.ml.onnx.OnnxWrapper private[johnsnowlabs] class ConvNextClassifier( - tensorflowWrapper: TensorflowWrapper, + tensorflowWrapper: Option[TensorflowWrapper], + onnxWrapper: Option[OnnxWrapper], configProtoBytes: Option[Array[Byte]] = None, tags: Map[String, BigInt], preprocessor: Preprocessor, signatures: Option[Map[String, String]] = None) - extends ViTClassifier(tensorflowWrapper, configProtoBytes, tags, preprocessor, signatures) { + extends ViTClassifier(tensorflowWrapper, onnxWrapper, configProtoBytes, tags, preprocessor, signatures) { override def encode( annotations: Array[AnnotationImage], diff --git a/src/main/scala/com/johnsnowlabs/ml/ai/ViTClassifier.scala b/src/main/scala/com/johnsnowlabs/ml/ai/ViTClassifier.scala index 6a7e81171627c9..bd827fb53293b5 100644 --- a/src/main/scala/com/johnsnowlabs/ml/ai/ViTClassifier.scala +++ b/src/main/scala/com/johnsnowlabs/ml/ai/ViTClassifier.scala @@ -16,6 +16,9 @@ package com.johnsnowlabs.ml.ai +import ai.onnxruntime.OnnxTensor +import com.johnsnowlabs.ml.onnx.{OnnxSession, OnnxWrapper} +import com.johnsnowlabs.ml.util.{ONNX, TensorFlow} import com.johnsnowlabs.ml.tensorflow.sign.{ModelSignatureConstants, ModelSignatureManager} import com.johnsnowlabs.ml.tensorflow.{TensorResources, TensorflowWrapper} import com.johnsnowlabs.nlp._ @@ -26,7 +29,8 @@ import com.johnsnowlabs.nlp.annotators.cv.util.transform.ImageResizeUtils import scala.collection.JavaConverters._ private[johnsnowlabs] class ViTClassifier( - val tensorflowWrapper: TensorflowWrapper, + val tensorflowWrapper: Option[TensorflowWrapper], + val onnxWrapper: Option[OnnxWrapper], configProtoBytes: Option[Array[Byte]] = None, tags: Map[String, BigInt], preprocessor: Preprocessor, @@ -35,6 +39,11 @@ private[johnsnowlabs] class ViTClassifier( val _tfViTSignatures: Map[String, String] = signatures.getOrElse(ModelSignatureManager.apply()) + val detectedEngine: String = + if (tensorflowWrapper.isDefined) TensorFlow.name + else if (onnxWrapper.isDefined) ONNX.name + else TensorFlow.name + private val onnxSessionOptions: Map[String, String] = new OnnxSession().getSessionOptions private def sessionWarmup(): Unit = { val image = @@ -48,17 +57,16 @@ private[johnsnowlabs] class ViTClassifier( sessionWarmup() - def tag( - batch: Array[Array[Array[Array[Float]]]], - activation: String = ActivationFunction.softmax): Array[Array[Float]] = { - val tensors = new TensorResources() - val batchLength = batch.length + def getRawScoresWithTF(batch: Array[Array[Array[Array[Float]]]]): Array[Float] = { + val tensors = new TensorResources() val imageTensors = tensors.createTensor(batch) - val runner = tensorflowWrapper - .getTFSessionWithSignature(configProtoBytes = configProtoBytes, initAllTables = false) - .runner + val session = tensorflowWrapper.get.getTFSessionWithSignature( + configProtoBytes = configProtoBytes, + savedSignatures = signatures, + initAllTables = false) + val runner = session.runner runner .feed( @@ -74,7 +82,39 @@ private[johnsnowlabs] class ViTClassifier( tensors.clearSession(outs) tensors.clearTensors() imageTensors.close() + rawScores + } + + def getRowScoresWithOnnx(batch: Array[Array[Array[Array[Float]]]]): Array[Float] = { + val (runner, env) = onnxWrapper.get.getSession(onnxSessionOptions) + val imageTensors = OnnxTensor.createTensor(env,batch) + val inputs = + Map( + "pixel_values" -> imageTensors).asJava + + val results = runner.run(inputs) + val rawScores = results + .get("logits") + .get() + .asInstanceOf[OnnxTensor] + .getFloatBuffer + .array() + + results.close() + imageTensors.close() + rawScores + } + + def tag( + batch: Array[Array[Array[Array[Float]]]], + activation: String = ActivationFunction.softmax): Array[Array[Float]] = { + + val batchLength = batch.length + val rawScores = detectedEngine match { + case ONNX.name => getRowScoresWithOnnx(batch) + case _ => getRawScoresWithTF(batch) + } val dim = rawScores.length / batchLength val batchScores: Array[Array[Float]] = rawScores diff --git a/src/main/scala/com/johnsnowlabs/ml/ai/VisionEncoderDecoder.scala b/src/main/scala/com/johnsnowlabs/ml/ai/VisionEncoderDecoder.scala index 6ee428f65c4f6e..3cf0b900b6aa86 100644 --- a/src/main/scala/com/johnsnowlabs/ml/ai/VisionEncoderDecoder.scala +++ b/src/main/scala/com/johnsnowlabs/ml/ai/VisionEncoderDecoder.scala @@ -18,8 +18,12 @@ package com.johnsnowlabs.ml.ai import ai.onnxruntime.{OnnxTensor, OrtEnvironment, OrtSession} import com.johnsnowlabs.ml.ai.util.Generation.{Generate, GenerationConfig} +import com.johnsnowlabs.ml.onnx.OnnxSession +import com.johnsnowlabs.ml.onnx.OnnxWrapper.EncoderDecoderWithoutPastWrappers +import com.johnsnowlabs.ml.onnx.TensorResources.implicits._ import com.johnsnowlabs.ml.tensorflow.sign.{ModelSignatureConstants, ModelSignatureManager} import com.johnsnowlabs.ml.tensorflow.{TensorResources, TensorflowWrapper} +import com.johnsnowlabs.ml.util.{ONNX, TensorFlow} import com.johnsnowlabs.nlp._ import com.johnsnowlabs.nlp.annotators.cv.feature_extractor.Preprocessor import com.johnsnowlabs.nlp.annotators.cv.util.io.ImageIOUtils @@ -31,7 +35,8 @@ import org.tensorflow.{Session, Tensor} import scala.collection.JavaConverters._ private[johnsnowlabs] class VisionEncoderDecoder( - val tensorflowWrapper: TensorflowWrapper, + val tensorflowWrapper: Option[TensorflowWrapper], + val onnxWrappers: Option[EncoderDecoderWithoutPastWrappers], configProtoBytes: Option[Array[Byte]] = None, tokenizer: Gpt2Tokenizer, preprocessor: Preprocessor, @@ -43,7 +48,12 @@ private[johnsnowlabs] class VisionEncoderDecoder( signatures.getOrElse(ModelSignatureManager.apply()) val tensorResources = new TensorResources() + private val onnxSessionOptions: Map[String, String] = new OnnxSession().getSessionOptions + val detectedEngine: String = + if (tensorflowWrapper.isDefined) TensorFlow.name + else if (onnxWrappers.isDefined) ONNX.name + else throw new IllegalArgumentException("No model engine defined.") private def sessionWarmup(): Unit = { val nChannels = 3 val dummyInput = Array( @@ -99,6 +109,15 @@ private[johnsnowlabs] class VisionEncoderDecoder( } } + private object OnnxSignatures { + val encoderInputIdsTensor: String = "pixel_values" + val encoderOutputKey = "last_hidden_state" + val decoderOutputKey: String = "logits" + val decoderInputIDs: String = "input_ids" + val decoderEncoderState: String = "encoder_hidden_states" + + } + private def preprocessImages( annotations: Array[AnnotationImage]): Array[Array[Array[Array[Float]]]] = { @@ -143,65 +162,38 @@ private[johnsnowlabs] class VisionEncoderDecoder( private def encodeImages( batch: Array[Array[Array[Array[Float]]]], beamSize: Int, - session: Session): Tensor = { + tfSession: Option[Session], + onnxSession: Option[(OrtSession, OrtEnvironment)]): AutoCloseable = { - // Expand the array for each beam val batchForBeams = batch.flatMap(imageFloats => Array.fill(beamSize)(imageFloats)) - val imageTensors = tensorResources.createTensor(batchForBeams) - - val runner = session.runner - .feed(TfSignatures.InputOps.encoderInput, imageTensors) - .fetch(TfSignatures.OutputOps.encoderState) - - val outs = runner.run().asScala - - outs.head - } - - def generate( - decoderInputIds: Array[Array[Int]], - decoderEncoderStateTensors: Tensor, - maxOutputLength: Int, - minOutputLength: Int, - doSample: Boolean, - beamSize: Int, - numReturnSequences: Int, - temperature: Double, - topK: Int, - topP: Double, - repetitionPenalty: Double, - noRepeatNgramSize: Int, - randomSeed: Option[Long], - session: Session): Array[Array[Int]] = { - - // Not used for vision encoders, but needed for batch size. Change in refactor. - val encoderIds: Array[Array[Int]] = Array.fill(decoderInputIds.length)(Array.empty) - - // Not used - val encoderAttentionMaskTensors = null - - generate( - inputIds = encoderIds, - decoderEncoderStateTensors = Left(decoderEncoderStateTensors), - encoderAttentionMaskTensors = Left(encoderAttentionMaskTensors), - decoderInputs = decoderInputIds, - maxOutputLength, - minOutputLength, - doSample, - beamSize, - numReturnSequences, - temperature, - topK, - topP, - repetitionPenalty, - noRepeatNgramSize, - generationConfig.vocabSize, - generationConfig.eosId, - generationConfig.padId, - randomSeed, - Array.empty, - Left(session)) + detectedEngine match { + case TensorFlow.name => + // Expand the array for each beam + + val imageTensors = tensorResources.createTensor(batchForBeams) + + val runner = tfSession.get.runner + .feed(TfSignatures.InputOps.encoderInput, imageTensors) + .fetch(TfSignatures.OutputOps.encoderState) + + val outs = runner.run().asScala + outs.head + + case ONNX.name => + val (session, env) = onnxSession.get + val imageTensors = OnnxTensor.createTensor(env, batchForBeams) + val encoderResults = session + .run(Map(OnnxSignatures.encoderInputIdsTensor -> imageTensors).asJava) + val output = encoderResults + .get(OnnxSignatures.encoderOutputKey) + .get() + .asInstanceOf[OnnxTensor] + output + + case _ => + throw new IllegalArgumentException("Unknown engine type.") + } } def generateFromImage( @@ -219,35 +211,80 @@ private[johnsnowlabs] class VisionEncoderDecoder( noRepeatNgramSize: Int, randomSeed: Option[Long]): Seq[Annotation] = { - val session: Session = tensorflowWrapper - .getTFSessionWithSignature(configProtoBytes = configProtoBytes, initAllTables = false) - val captions: Seq[Annotation] = images .grouped(batchSize) .flatMap { batch => val batchSize = batch.length - val preprocessedImages = preprocessImages(images) - val encodedImages = encodeImages(preprocessedImages, beamSize, session) - val batchDecoderStartIds = Array.fill(batchSize, 1)(generationConfig.bosId) + val encoderIds: Array[Array[Int]] = Array.fill(batchDecoderStartIds.length)(Array.empty) val generatedTokenIds = - generate( - batchDecoderStartIds, - decoderEncoderStateTensors = encodedImages, - maxOutputLength, - minOutputLength, - doSample, - beamSize, - numReturnSequences, - temperature, - topK, - topP, - repetitionPenalty, - noRepeatNgramSize, - randomSeed, - session) + detectedEngine match { + case TensorFlow.name => + val session: Session = tensorflowWrapper.get + .getTFSessionWithSignature( + configProtoBytes = configProtoBytes, + initAllTables = false) + val encodedImages = encodeImages(preprocessedImages, beamSize, Some(session), None) + .asInstanceOf[Tensor] + generate( + inputIds = encoderIds, + decoderEncoderStateTensors = Left(encodedImages), + encoderAttentionMaskTensors = null, + decoderInputs = batchDecoderStartIds, + maxOutputLength, + minOutputLength, + doSample, + beamSize, + numReturnSequences, + temperature, + topK, + topP, + repetitionPenalty, + noRepeatNgramSize, + generationConfig.vocabSize, + generationConfig.eosId, + generationConfig.padId, + randomSeed, + Array.empty, + Left(session)) + case ONNX.name => + val (encoderSession, encoderEnv) = + onnxWrappers.get.encoder.getSession(onnxSessionOptions) + val (decoderSession, decoderEnv) = + onnxWrappers.get.decoder.getSession(onnxSessionOptions) + val encodedImages = + encodeImages( + preprocessedImages, + beamSize, + None, + Some((encoderSession, encoderEnv))) + .asInstanceOf[OnnxTensor] + generate( + inputIds = batchDecoderStartIds, + decoderEncoderStateTensors = Right(encodedImages), + encoderAttentionMaskTensors = + Right(OnnxTensor.createTensor(encoderEnv, Array(1))), + decoderInputs = batchDecoderStartIds, + maxOutputLength, + minOutputLength, + doSample, + beamSize, + numReturnSequences, + temperature, + topK, + topP, + repetitionPenalty, + noRepeatNgramSize, + generationConfig.vocabSize, + generationConfig.eosId, + generationConfig.padId, + randomSeed, + Array.empty, + Right((decoderEnv, decoderSession))) + + } val decodedStringsBatch = generatedTokenIds.map(tokenizer.decodeTokens).map(_.trim) @@ -269,7 +306,7 @@ private[johnsnowlabs] class VisionEncoderDecoder( } .toSeq - tensorResources.clearTensors() + //tensorResources.clearTensors() captions } @@ -299,31 +336,65 @@ private[johnsnowlabs] class VisionEncoderDecoder( maxLength: Int, session: Either[Session, (OrtEnvironment, OrtSession)], ovInferRequest: Option[InferRequest]): Array[Array[Float]] = { - val sess: Session = session.left.get - val decoderEncoderStateTensor: Tensor = decoderEncoderStateTensors.left.get - getModelOutput(decoderInputIds, decoderEncoderStateTensor, sess) + getModelOutput(decoderInputIds, decoderEncoderStateTensors, session) } def getModelOutput( decoderInputIds: Seq[Array[Int]], - decoderEncoderStateTensors: Tensor, - session: Session) = { - - val decoderInputIdsTensor = tensorResources.createTensor(decoderInputIds.toArray) - val runner = - session - .runner() - .feed(TfSignatures.InputOps.decoderEncoderState, decoderEncoderStateTensors) - .feed(TfSignatures.InputOps.decoderInputIds, decoderInputIdsTensor) - .fetch(TfSignatures.OutputOps.decoderLogits) - - val decoderOuts = runner.run().asScala - val logitsRaw = TensorResources.extractFloats(decoderOuts.head) - decoderOuts.head.close() - - val logits = logitsRaw.grouped(generationConfig.vocabSize) + decoderEncoderStateTensors: Either[Tensor, OnnxTensor], + session: Either[Session, (OrtEnvironment, OrtSession)]) = { + + val decoderEncoderStateTensor = decoderEncoderStateTensors.fold( + tfTensor => { + // not implemented yet + null + }, + onnxTensor => onnxTensor) + + detectedEngine match { + case TensorFlow.name => + val decoderInputIdsTensor = tensorResources.createTensor(decoderInputIds.toArray) + + val runner = + session.left.get + .runner() + .feed(TfSignatures.InputOps.decoderEncoderState, decoderEncoderStateTensors.left.get) + .feed(TfSignatures.InputOps.decoderInputIds, decoderInputIdsTensor) + .fetch(TfSignatures.OutputOps.decoderLogits) + + val decoderOuts = runner.run().asScala + val logitsRaw = TensorResources.extractFloats(decoderOuts.head) + decoderOuts.head.close() + + val logits = logitsRaw.grouped(generationConfig.vocabSize) + + logits.toArray + case ONNX.name => + val (env, decoderSession) = session.right.get + val decoderInputIdsLong: Array[Array[Long]] = + decoderInputIds.toArray.map { tokenIds => tokenIds.map(_.toLong) } + + val decoderInputIdsTensor = + OnnxTensor.createTensor(env, decoderInputIdsLong) + + val decoderInputs: java.util.Map[String, OnnxTensor] = Map( + OnnxSignatures.decoderInputIDs -> decoderInputIdsTensor, + OnnxSignatures.decoderEncoderState -> decoderEncoderStateTensor).asJava + val sessionOutput = decoderSession.run(decoderInputs) + + val sequenceLength = decoderInputIds.head.length + val batchSize = decoderInputIds.length + + val logitsRaw = sessionOutput.getFloatArray(OnnxSignatures.decoderOutputKey) + val decoderOutputs = (0 until batchSize).map(i => { + logitsRaw + .slice( + i * sequenceLength * generationConfig.vocabSize + (sequenceLength - 1) * generationConfig.vocabSize, + i * sequenceLength * generationConfig.vocabSize + sequenceLength * generationConfig.vocabSize) + }) + decoderOutputs.toArray - logits.toArray + } } } diff --git a/src/main/scala/com/johnsnowlabs/ml/ai/XlmRoBertaClassification.scala b/src/main/scala/com/johnsnowlabs/ml/ai/XlmRoBertaClassification.scala index 2654d7d6198e4c..3c8e849aa46fed 100644 --- a/src/main/scala/com/johnsnowlabs/ml/ai/XlmRoBertaClassification.scala +++ b/src/main/scala/com/johnsnowlabs/ml/ai/XlmRoBertaClassification.scala @@ -132,7 +132,6 @@ private[johnsnowlabs] class XlmRoBertaClassification( case ONNX.name => getRowScoresWithOnnx(batch) case _ => getRawScoresWithTF(batch, maxSentenceLength) } - println(rawScores.mkString("Array(", ", ", ")")) val dim = rawScores.length / (batchLength * maxSentenceLength) val batchScores: Array[Array[Array[Float]]] = rawScores .grouped(dim) diff --git a/src/main/scala/com/johnsnowlabs/nlp/annotators/cv/ConvNextForImageClassification.scala b/src/main/scala/com/johnsnowlabs/nlp/annotators/cv/ConvNextForImageClassification.scala index 3af9710b12ff9b..890b29957d5587 100644 --- a/src/main/scala/com/johnsnowlabs/nlp/annotators/cv/ConvNextForImageClassification.scala +++ b/src/main/scala/com/johnsnowlabs/nlp/annotators/cv/ConvNextForImageClassification.scala @@ -17,13 +17,14 @@ package com.johnsnowlabs.nlp.annotators.cv import com.johnsnowlabs.ml.ai.ConvNextClassifier +import com.johnsnowlabs.ml.onnx.{OnnxWrapper, ReadOnnxModel, WriteOnnxModel} import com.johnsnowlabs.ml.tensorflow.{ReadTensorflowModel, TensorflowWrapper} import com.johnsnowlabs.ml.util.LoadExternalModel.{ loadJsonStringAsset, modelSanityCheck, notSupportedEngineError } -import com.johnsnowlabs.ml.util.TensorFlow +import com.johnsnowlabs.ml.util.{ONNX, TensorFlow} import com.johnsnowlabs.nlp._ import com.johnsnowlabs.nlp.annotators.cv.feature_extractor.Preprocessor import org.apache.spark.broadcast.Broadcast @@ -183,16 +184,18 @@ class ConvNextForImageClassification(override val uid: String) /** @group getParam */ override def getModelIfNotSet: ConvNextClassifier = _model.get.value - override def setModelIfNotSet( + override def setModelIfNotSet( spark: SparkSession, - tensorflow: TensorflowWrapper, + tensorflowWrapper: Option[TensorflowWrapper], + onnxWrapper: Option[OnnxWrapper], preprocessor: Preprocessor): ConvNextForImageClassification.this.type = { if (_model.isEmpty) { _model = Some( spark.sparkContext.broadcast( new ConvNextClassifier( - tensorflow, + tensorflowWrapper, + onnxWrapper, configProtoBytes = getConfigProtoBytes, tags = $$(labels), preprocessor = preprocessor, @@ -257,13 +260,25 @@ class ConvNextForImageClassification(override val uid: String) } override def onWrite(path: String, spark: SparkSession): Unit = { - writeTensorflowModelV2( - path, - spark, - getModelIfNotSet.tensorflowWrapper, - "_image_classification", - ConvNextForImageClassification.tfFile, - configProtoBytes = getConfigProtoBytes) + val suffix = "_image_classification" + + getEngine match { + case TensorFlow.name => + writeTensorflowModelV2( + path, + spark, + getModelIfNotSet.tensorflowWrapper.get, + suffix, + ConvNextForImageClassification.tfFile, + configProtoBytes = getConfigProtoBytes) + case ONNX.name => + writeOnnxModel( + path, + spark, + getModelIfNotSet.onnxWrapper.get, + suffix, + ConvNextForImageClassification.onnxFile) + } } } @@ -287,17 +302,19 @@ trait ReadablePretrainedConvNextForImageModel remoteLoc: String): ConvNextForImageClassification = super.pretrained(name, lang, remoteLoc) } -trait ReadConvNextForImageDLModel extends ReadTensorflowModel { +trait ReadConvNextForImageDLModel + extends ReadTensorflowModel + with ReadOnnxModel { this: ParamsAndFeaturesReadable[ConvNextForImageClassification] => override val tfFile: String = "image_classification_convnext_tensorflow" + override val onnxFile: String = "image_classification_convnext_onnx" - def readTensorflow( - instance: ConvNextForImageClassification, - path: String, - spark: SparkSession): Unit = { + def readModel( + instance: ConvNextForImageClassification, + path: String, + spark: SparkSession): Unit = { - val tf = readTensorflowModel(path, spark, "_image_classification_tf") val preprocessor = Preprocessor( do_normalize = instance.getDoNormalize, @@ -310,71 +327,97 @@ trait ReadConvNextForImageDLModel extends ReadTensorflowModel { rescale_factor = instance.getRescaleFactor, size = instance.getSize, crop_pct = Option(instance.getCropPct)) - - instance.setModelIfNotSet(spark, tf, preprocessor) - - } - - addReader(readTensorflow) - - def loadSavedModel(modelPath: String, spark: SparkSession): ConvNextForImageClassification = { - - val (localModelPath, detectedEngine) = modelSanityCheck(modelPath) - - // TODO: sometimes results in [String, BigInt] where BigInt is actually a string - val labelJsonContent = loadJsonStringAsset(localModelPath, "labels.json") - val labelJsonMap = - parse(labelJsonContent, useBigIntForLong = true).values - .asInstanceOf[Map[String, BigInt]] - - val preprocessorConfigJsonContent = - loadJsonStringAsset(localModelPath, "preprocessor_config.json") - val preprocessorConfig = - Preprocessor.loadPreprocessorConfig(preprocessorConfigJsonContent) - - require( - preprocessorConfig.size >= 384 || preprocessorConfig.crop_pct.nonEmpty, - "Property \'crop_pct\' should be defined, if size < 384.") - val cropPct = preprocessorConfig.crop_pct.get - - val annotatorModel = new ConvNextForImageClassification() - .setLabels(labelJsonMap) - .setDoNormalize(preprocessorConfig.do_normalize) - .setDoResize(preprocessorConfig.do_resize) - .setFeatureExtractorType(preprocessorConfig.feature_extractor_type) - .setImageMean(preprocessorConfig.image_mean) - .setImageStd(preprocessorConfig.image_std) - .setResample(preprocessorConfig.resample) - .setSize(preprocessorConfig.size) - .setDoRescale(preprocessorConfig.do_rescale) - .setRescaleFactor(preprocessorConfig.rescale_factor) - .setCropPct(cropPct) - - annotatorModel.set(annotatorModel.engine, detectedEngine) - - detectedEngine match { + instance.getEngine match { case TensorFlow.name => - val (wrapper, signatures) = - TensorflowWrapper.read(localModelPath, zipped = false, useBundle = true) - - val _signatures = signatures match { - case Some(s) => s - case None => throw new Exception("Cannot load signature definitions from model!") - } - - /** the order of setSignatures is important if we use getSignatures inside - * setModelIfNotSet - */ - annotatorModel - .setSignatures(_signatures) - .setModelIfNotSet(spark, wrapper, preprocessorConfig) + val tfWrapper = + readTensorflowModel(path, spark, tfFile, initAllTables = false) + + instance.setModelIfNotSet(spark, Some(tfWrapper), None, preprocessor) + case ONNX.name => + val onnxWrapper = + readOnnxModel( + path, + spark, + onnxFile, + zipped = true, + useBundle = false, + None) + + instance.setModelIfNotSet(spark, None, Some(onnxWrapper), preprocessor) case _ => throw new Exception(notSupportedEngineError) } +} + - annotatorModel + addReader(readModel) + def loadSavedModel(modelPath: String, spark: SparkSession): ConvNextForImageClassification = { + + val (localModelPath, detectedEngine) = modelSanityCheck(modelPath) + + // TODO: sometimes results in [String, BigInt] where BigInt is actually a string + val labelJsonContent = loadJsonStringAsset(localModelPath, "labels.json") + val labelJsonMap = + parse(labelJsonContent, useBigIntForLong = true).values + .asInstanceOf[Map[String, BigInt]] + + val preprocessorConfigJsonContent = + loadJsonStringAsset(localModelPath, "preprocessor_config.json") + val preprocessorConfig = + Preprocessor.loadPreprocessorConfig(preprocessorConfigJsonContent) + + require( + preprocessorConfig.size >= 384 || preprocessorConfig.crop_pct.nonEmpty, + "Property \'crop_pct\' should be defined, if size < 384.") + val cropPct = preprocessorConfig.crop_pct.get + + val annotatorModel = new ConvNextForImageClassification() + .setLabels(labelJsonMap) + .setDoNormalize(preprocessorConfig.do_normalize) + .setDoResize(preprocessorConfig.do_resize) + .setFeatureExtractorType(preprocessorConfig.feature_extractor_type) + .setImageMean(preprocessorConfig.image_mean) + .setImageStd(preprocessorConfig.image_std) + .setResample(preprocessorConfig.resample) + .setSize(preprocessorConfig.size) + .setDoRescale(preprocessorConfig.do_rescale) + .setRescaleFactor(preprocessorConfig.rescale_factor) + .setCropPct(cropPct) + + annotatorModel.set(annotatorModel.engine, detectedEngine) + + + detectedEngine match { + case TensorFlow.name => + val (tfwrapper, signatures) = + TensorflowWrapper.read(localModelPath, zipped = false, useBundle = true) + + val _signatures = signatures match { + case Some(s) => s + case None => throw new Exception("Cannot load signature definitions from model!") + } + + /** the order of setSignatures is important if we use getSignatures inside + * setModelIfNotSet + */ + annotatorModel + .setSignatures(_signatures) + .setModelIfNotSet(spark, Some(tfwrapper), None, preprocessorConfig) + + case ONNX.name => + val onnxWrapper = OnnxWrapper.read(spark, localModelPath, zipped = false, useBundle = true) + + annotatorModel + .setModelIfNotSet(spark, None, Some(onnxWrapper), preprocessorConfig) + + case _ => + throw new Exception(notSupportedEngineError) + } + + annotatorModel + } } -} + /** This is the companion object of [[ConvNextForImageClassification]]. Please refer to that class * for the documentation. diff --git a/src/main/scala/com/johnsnowlabs/nlp/annotators/cv/SwinForImageClassification.scala b/src/main/scala/com/johnsnowlabs/nlp/annotators/cv/SwinForImageClassification.scala index ebf815c87b2b29..adf47efbac9f12 100644 --- a/src/main/scala/com/johnsnowlabs/nlp/annotators/cv/SwinForImageClassification.scala +++ b/src/main/scala/com/johnsnowlabs/nlp/annotators/cv/SwinForImageClassification.scala @@ -16,13 +16,14 @@ package com.johnsnowlabs.nlp.annotators.cv +import com.johnsnowlabs.ml.onnx.{OnnxWrapper, ReadOnnxModel, WriteOnnxModel} import com.johnsnowlabs.ml.tensorflow.{ReadTensorflowModel, TensorflowWrapper} import com.johnsnowlabs.ml.util.LoadExternalModel.{ loadJsonStringAsset, modelSanityCheck, notSupportedEngineError } -import com.johnsnowlabs.ml.util.TensorFlow +import com.johnsnowlabs.ml.util.{ONNX, TensorFlow} import com.johnsnowlabs.nlp._ import com.johnsnowlabs.nlp.annotators.cv.feature_extractor.Preprocessor import org.apache.spark.ml.util.Identifiable @@ -218,13 +219,26 @@ class SwinForImageClassification(override val uid: String) } override def onWrite(path: String, spark: SparkSession): Unit = { - writeTensorflowModelV2( - path, - spark, - getModelIfNotSet.tensorflowWrapper, - "_image_classification", - SwinForImageClassification.tfFile, - configProtoBytes = getConfigProtoBytes) + super.onWrite(path, spark) + val suffix = "_image_classification" + + getEngine match { + case TensorFlow.name => + writeTensorflowModelV2( + path, + spark, + getModelIfNotSet.tensorflowWrapper.get, + suffix, + SwinForImageClassification.tfFile, + configProtoBytes = getConfigProtoBytes) + case ONNX.name => + writeOnnxModel( + path, + spark, + getModelIfNotSet.onnxWrapper.get, + suffix, + SwinForImageClassification.onnxFile) + } } } @@ -249,18 +263,19 @@ trait ReadablePretrainedSwinForImageModel remoteLoc: String): SwinForImageClassification = super.pretrained(name, lang, remoteLoc) } -trait ReadSwinForImageDLModel extends ReadTensorflowModel { +trait ReadSwinForImageDLModel + extends ReadTensorflowModel + with ReadOnnxModel { this: ParamsAndFeaturesReadable[SwinForImageClassification] => override val tfFile: String = "image_classification_swin_tensorflow" + override val onnxFile: String = "image_classification_swin_onnx" - def readTensorflow( + def readModel( instance: SwinForImageClassification, path: String, spark: SparkSession): Unit = { - val tf = readTensorflowModel(path, spark, "_image_classification_tf") - val preprocessor = Preprocessor( do_normalize = instance.getDoNormalize, do_resize = instance.getDoRescale, @@ -272,10 +287,30 @@ trait ReadSwinForImageDLModel extends ReadTensorflowModel { rescale_factor = instance.getRescaleFactor, size = instance.getSize) - instance.setModelIfNotSet(spark, tf, preprocessor) + instance.getEngine match { + case TensorFlow.name => + val tfWrapper = + readTensorflowModel(path, spark, tfFile, initAllTables = false) + + instance.setModelIfNotSet(spark, Some(tfWrapper), None, preprocessor) + case ONNX.name => + val onnxWrapper = + readOnnxModel( + path, + spark, + onnxFile, + zipped = true, + useBundle = false, + None) + + instance.setModelIfNotSet(spark, None, Some(onnxWrapper), preprocessor) + case _ => + throw new Exception(notSupportedEngineError) + + } } - addReader(readTensorflow) + addReader(readModel) def loadSavedModel(modelPath: String, spark: SparkSession): SwinForImageClassification = { @@ -316,13 +351,20 @@ trait ReadSwinForImageDLModel extends ReadTensorflowModel { case Some(s) => s case None => throw new Exception("Cannot load signature definitions from model!") } - /** the order of setSignatures is important if we use getSignatures inside - * setModelIfNotSet - */ + * setModelIfNotSet + */ annotatorModel .setSignatures(_signatures) - .setModelIfNotSet(spark, wrapper, preprocessorConfig) + .setModelIfNotSet(spark, Some(wrapper), None, preprocessorConfig) + case ONNX.name => + val onnxWrapper = OnnxWrapper.read(spark, localModelPath, zipped = false, useBundle = true) + + annotatorModel + .setModelIfNotSet(spark, None, Some(onnxWrapper), preprocessorConfig) + + + case _ => throw new Exception(notSupportedEngineError) diff --git a/src/main/scala/com/johnsnowlabs/nlp/annotators/cv/ViTForImageClassification.scala b/src/main/scala/com/johnsnowlabs/nlp/annotators/cv/ViTForImageClassification.scala index 985fbc041251a5..3fbfc4877adaec 100644 --- a/src/main/scala/com/johnsnowlabs/nlp/annotators/cv/ViTForImageClassification.scala +++ b/src/main/scala/com/johnsnowlabs/nlp/annotators/cv/ViTForImageClassification.scala @@ -22,12 +22,13 @@ import com.johnsnowlabs.ml.tensorflow.{ TensorflowWrapper, WriteTensorflowModel } +import com.johnsnowlabs.ml.onnx.{OnnxWrapper, ReadOnnxModel, WriteOnnxModel} import com.johnsnowlabs.ml.util.LoadExternalModel.{ loadJsonStringAsset, modelSanityCheck, notSupportedEngineError } -import com.johnsnowlabs.ml.util.TensorFlow +import com.johnsnowlabs.ml.util.{ONNX, TensorFlow} import com.johnsnowlabs.nlp.AnnotatorType.{CATEGORY, IMAGE} import com.johnsnowlabs.nlp._ import com.johnsnowlabs.nlp.annotators.cv.feature_extractor.Preprocessor @@ -144,6 +145,7 @@ class ViTForImageClassification(override val uid: String) with HasBatchedAnnotateImage[ViTForImageClassification] with HasImageFeatureProperties with WriteTensorflowModel + with WriteOnnxModel with HasEngine { /** Annotator reference id. Used to identify elements in metadata or to refer to this annotator @@ -227,14 +229,16 @@ class ViTForImageClassification(override val uid: String) /** @group setParam */ def setModelIfNotSet( spark: SparkSession, - tensorflow: TensorflowWrapper, + tensorflowWrapper: Option[TensorflowWrapper], + onnxWrapper: Option[OnnxWrapper], preprocessor: Preprocessor): this.type = { if (_model.isEmpty) { _model = Some( spark.sparkContext.broadcast( new ViTClassifier( - tensorflow, + tensorflowWrapper, + onnxWrapper, configProtoBytes = getConfigProtoBytes, tags = $$(labels), preprocessor = preprocessor, @@ -300,13 +304,25 @@ class ViTForImageClassification(override val uid: String) override def onWrite(path: String, spark: SparkSession): Unit = { super.onWrite(path, spark) - writeTensorflowModelV2( - path, - spark, - getModelIfNotSet.tensorflowWrapper, - "_image_classification", - ViTForImageClassification.tfFile, - configProtoBytes = getConfigProtoBytes) + val suffix = "_image_classification" + + getEngine match { + case TensorFlow.name => + writeTensorflowModelV2( + path, + spark, + getModelIfNotSet.tensorflowWrapper.get, + suffix, + ViTForImageClassification.tfFile, + configProtoBytes = getConfigProtoBytes) + case ONNX.name => + writeOnnxModel( + path, + spark, + getModelIfNotSet.onnxWrapper.get, + suffix, + ViTForImageClassification.onnxFile) + } } } @@ -330,15 +346,16 @@ trait ReadablePretrainedViTForImageModel remoteLoc: String): ViTForImageClassification = super.pretrained(name, lang, remoteLoc) } -trait ReadViTForImageDLModel extends ReadTensorflowModel { +trait ReadViTForImageDLModel + extends ReadTensorflowModel + with ReadOnnxModel { this: ParamsAndFeaturesReadable[ViTForImageClassification] => override val tfFile: String = "image_classification_tensorflow" + override val onnxFile: String = "image_classification_onnx" def readModel(instance: ViTForImageClassification, path: String, spark: SparkSession): Unit = { - val tf = readTensorflowModel(path, spark, "_image_classification_tf", initAllTables = false) - val preprocessor = Preprocessor( do_normalize = true, do_resize = true, @@ -347,8 +364,30 @@ trait ReadViTForImageDLModel extends ReadTensorflowModel { instance.getImageStd, instance.getResample, instance.getSize) + instance.getEngine match { + case TensorFlow.name => + val tfWrapper = + readTensorflowModel(path, spark, tfFile, initAllTables = false) + + + instance.setModelIfNotSet(spark, Some(tfWrapper), None, preprocessor) + case ONNX.name => + val onnxWrapper = + readOnnxModel( + path, + spark, + onnxFile, + zipped = true, + useBundle = false, + None) + + instance.setModelIfNotSet(spark, None, Some(onnxWrapper), preprocessor) + case _ => + throw new Exception(notSupportedEngineError) + } + + - instance.setModelIfNotSet(spark, tf, preprocessor) } addReader(readModel) @@ -383,20 +422,25 @@ trait ReadViTForImageDLModel extends ReadTensorflowModel { detectedEngine match { case TensorFlow.name => - val (wrapper, signatures) = + val (tfwrapper, signatures) = TensorflowWrapper.read(localModelPath, zipped = false, useBundle = true) val _signatures = signatures match { case Some(s) => s case None => throw new Exception("Cannot load signature definitions from model!") } - /** the order of setSignatures is important if we use getSignatures inside * setModelIfNotSet */ annotatorModel .setSignatures(_signatures) - .setModelIfNotSet(spark, wrapper, preprocessorConfig) + .setModelIfNotSet(spark, Some(tfwrapper), None, preprocessorConfig) + + case ONNX.name => + val onnxWrapper = OnnxWrapper.read(spark, localModelPath, zipped = false, useBundle = true) + + annotatorModel + .setModelIfNotSet(spark, None, Some(onnxWrapper), preprocessorConfig) case _ => throw new Exception(notSupportedEngineError) diff --git a/src/main/scala/com/johnsnowlabs/nlp/annotators/cv/VisionEncoderDecoderForImageCaptioning.scala b/src/main/scala/com/johnsnowlabs/nlp/annotators/cv/VisionEncoderDecoderForImageCaptioning.scala index cdaef26629942f..c5926c389b436e 100644 --- a/src/main/scala/com/johnsnowlabs/nlp/annotators/cv/VisionEncoderDecoderForImageCaptioning.scala +++ b/src/main/scala/com/johnsnowlabs/nlp/annotators/cv/VisionEncoderDecoderForImageCaptioning.scala @@ -17,19 +17,12 @@ package com.johnsnowlabs.nlp.annotators.cv import com.johnsnowlabs.ml.ai.VisionEncoderDecoder +import com.johnsnowlabs.ml.onnx.{OnnxWrapper, ReadOnnxModel, WriteOnnxModel} import com.johnsnowlabs.ml.ai.util.Generation.GenerationConfig -import com.johnsnowlabs.ml.tensorflow.{ - ReadTensorflowModel, - TensorflowWrapper, - WriteTensorflowModel -} -import com.johnsnowlabs.ml.util.LoadExternalModel.{ - loadJsonStringAsset, - loadTextAsset, - modelSanityCheck, - notSupportedEngineError -} -import com.johnsnowlabs.ml.util.TensorFlow +import com.johnsnowlabs.ml.onnx.OnnxWrapper.EncoderDecoderWithoutPastWrappers +import com.johnsnowlabs.ml.tensorflow.{ReadTensorflowModel, TensorflowWrapper, WriteTensorflowModel} +import com.johnsnowlabs.ml.util.LoadExternalModel.{loadJsonStringAsset, loadTextAsset, modelSanityCheck, notSupportedEngineError} +import com.johnsnowlabs.ml.util.{ONNX, TensorFlow} import com.johnsnowlabs.nlp.AnnotatorType.{DOCUMENT, IMAGE} import com.johnsnowlabs.nlp._ import com.johnsnowlabs.nlp.annotators.cv.feature_extractor.Preprocessor @@ -44,152 +37,153 @@ import org.json4s.jackson.JsonMethods.parse import org.json4s.{DefaultFormats, JValue} /** VisionEncoderDecoder model that converts images into text captions. It allows for the use of - * pretrained vision auto-encoding models, such as ViT, BEiT, or DeiT as the encoder, in - * combination with pretrained language models, like RoBERTa, GPT2, or BERT as the decoder. - * - * Pretrained models can be loaded with `pretrained` of the companion object: - * - * {{{ - * val imageClassifier = VisionEncoderDecoderForImageCaptioning.pretrained() - * .setInputCols("image_assembler") - * .setOutputCol("caption") - * }}} - * The default model is `"image_captioning_vit_gpt2"`, if no name is provided. - * - * For available pretrained models please see the - * [[https://sparknlp.org/models?task=Image+Captioning Models Hub]]. - * - * Models from the HuggingFace 🤗 Transformers library are also compatible with Spark NLP 🚀. To - * see which models are compatible and how to import them see - * [[https://github.com/JohnSnowLabs/spark-nlp/discussions/5669]] and to see more extended - * examples, see - * [[https://github.com/JohnSnowLabs/spark-nlp/blob/master/src/test/scala/com/johnsnowlabs/nlp/annotators/cv/VisionEncoderDecoderForImageCaptioningTestSpec.scala VisionEncoderDecoderTestSpec]]. - * - * '''Note:''' - * - * This is a very computationally expensive module especially on larger batch sizes. The use of - * an accelerator such as GPU is recommended. - * - * ==Example== - * {{{ - * import com.johnsnowlabs.nlp.annotator._ - * import com.johnsnowlabs.nlp.ImageAssembler - * import org.apache.spark.ml.Pipeline - * - * val imageDF: DataFrame = spark.read - * .format("image") - * .option("dropInvalid", value = true) - * .load("src/test/resources/image/") - * - * val imageAssembler = new ImageAssembler() - * .setInputCol("image") - * .setOutputCol("image_assembler") - * - * val imageCaptioning = VisionEncoderDecoderForImageCaptioning - * .pretrained() - * .setBeamSize(2) - * .setDoSample(false) - * .setInputCols("image_assembler") - * .setOutputCol("caption") - * - * val pipeline = new Pipeline().setStages(Array(imageAssembler, imageCaptioning)) - * val pipelineDF = pipeline.fit(imageDF).transform(imageDF) - * - * pipelineDF - * .selectExpr("reverse(split(image.origin, '/'))[0] as image_name", "caption.result") - * .show(truncate = false) - * - * +-----------------+---------------------------------------------------------+ - * |image_name |result | - * +-----------------+---------------------------------------------------------+ - * |palace.JPEG |[a large room filled with furniture and a large window] | - * |egyptian_cat.jpeg|[a cat laying on a couch next to another cat] | - * |hippopotamus.JPEG|[a brown bear in a body of water] | - * |hen.JPEG |[a flock of chickens standing next to each other] | - * |ostrich.JPEG |[a large bird standing on top of a lush green field] | - * |junco.JPEG |[a small bird standing on a wet ground] | - * |bluetick.jpg |[a small dog standing on a wooden floor] | - * |chihuahua.jpg |[a small brown dog wearing a blue sweater] | - * |tractor.JPEG |[a man is standing in a field with a tractor] | - * |ox.JPEG |[a large brown cow standing on top of a lush green field]| - * +-----------------+---------------------------------------------------------+ - * }}} - * - * @param uid - * required uid for storing annotator to disk - * @groupname anno Annotator types - * @groupdesc anno - * Required input and expected output annotator types - * @groupname Ungrouped Members - * @groupname param Parameters - * @groupname setParam Parameter setters - * @groupname getParam Parameter getters - * @groupname Ungrouped Members - * @groupprio param 1 - * @groupprio anno 2 - * @groupprio Ungrouped 3 - * @groupprio setParam 4 - * @groupprio getParam 5 - * @groupdesc param - * A list of (hyper-)parameter keys this annotator can take. Users can set and get the - * parameter values through setters and getters, respectively. - */ + * pretrained vision auto-encoding models, such as ViT, BEiT, or DeiT as the encoder, in + * combination with pretrained language models, like RoBERTa, GPT2, or BERT as the decoder. + * + * Pretrained models can be loaded with `pretrained` of the companion object: + * + * {{{ + * val imageClassifier = VisionEncoderDecoderForImageCaptioning.pretrained() + * .setInputCols("image_assembler") + * .setOutputCol("caption") + * }}} + * The default model is `"image_captioning_vit_gpt2"`, if no name is provided. + * + * For available pretrained models please see the + * [[https://sparknlp.org/models?task=Image+Captioning Models Hub]]. + * + * Models from the HuggingFace 🤗 Transformers library are also compatible with Spark NLP 🚀. To + * see which models are compatible and how to import them see + * [[https://github.com/JohnSnowLabs/spark-nlp/discussions/5669]] and to see more extended + * examples, see + * [[https://github.com/JohnSnowLabs/spark-nlp/blob/master/src/test/scala/com/johnsnowlabs/nlp/annotators/cv/VisionEncoderDecoderForImageCaptioningTestSpec.scala VisionEncoderDecoderTestSpec]]. + * + * '''Note:''' + * + * This is a very computationally expensive module especially on larger batch sizes. The use of + * an accelerator such as GPU is recommended. + * + * ==Example== + * {{{ + * import com.johnsnowlabs.nlp.annotator._ + * import com.johnsnowlabs.nlp.ImageAssembler + * import org.apache.spark.ml.Pipeline + * + * val imageDF: DataFrame = spark.read + * .format("image") + * .option("dropInvalid", value = true) + * .load("src/test/resources/image/") + * + * val imageAssembler = new ImageAssembler() + * .setInputCol("image") + * .setOutputCol("image_assembler") + * + * val imageCaptioning = VisionEncoderDecoderForImageCaptioning + * .pretrained() + * .setBeamSize(2) + * .setDoSample(false) + * .setInputCols("image_assembler") + * .setOutputCol("caption") + * + * val pipeline = new Pipeline().setStages(Array(imageAssembler, imageCaptioning)) + * val pipelineDF = pipeline.fit(imageDF).transform(imageDF) + * + * pipelineDF + * .selectExpr("reverse(split(image.origin, '/'))[0] as image_name", "caption.result") + * .show(truncate = false) + * + * +-----------------+---------------------------------------------------------+ + * |image_name |result | + * +-----------------+---------------------------------------------------------+ + * |palace.JPEG |[a large room filled with furniture and a large window] | + * |egyptian_cat.jpeg|[a cat laying on a couch next to another cat] | + * |hippopotamus.JPEG|[a brown bear in a body of water] | + * |hen.JPEG |[a flock of chickens standing next to each other] | + * |ostrich.JPEG |[a large bird standing on top of a lush green field] | + * |junco.JPEG |[a small bird standing on a wet ground] | + * |bluetick.jpg |[a small dog standing on a wooden floor] | + * |chihuahua.jpg |[a small brown dog wearing a blue sweater] | + * |tractor.JPEG |[a man is standing in a field with a tractor] | + * |ox.JPEG |[a large brown cow standing on top of a lush green field]| + * +-----------------+---------------------------------------------------------+ + * }}} + * + * @param uid + * required uid for storing annotator to disk + * @groupname anno Annotator types + * @groupdesc anno + * Required input and expected output annotator types + * @groupname Ungrouped Members + * @groupname param Parameters + * @groupname setParam Parameter setters + * @groupname getParam Parameter getters + * @groupname Ungrouped Members + * @groupprio param 1 + * @groupprio anno 2 + * @groupprio Ungrouped 3 + * @groupprio setParam 4 + * @groupprio getParam 5 + * @groupdesc param + * A list of (hyper-)parameter keys this annotator can take. Users can set and get the + * parameter values through setters and getters, respectively. + */ class VisionEncoderDecoderForImageCaptioning(override val uid: String) - extends AnnotatorModel[VisionEncoderDecoderForImageCaptioning] + extends AnnotatorModel[VisionEncoderDecoderForImageCaptioning] with HasBatchedAnnotateImage[VisionEncoderDecoderForImageCaptioning] with HasImageFeatureProperties with WriteTensorflowModel + with WriteOnnxModel with HasEngine with HasRescaleFactor with HasGeneratorProperties { /** Annotator reference id. Used to identify elements in metadata or to refer to this annotator - * type - */ + * type + */ def this() = this(Identifiable.randomUID("VisionEncoderDecoderForImageCaptioning")) /** Output annotator type : CATEGORY - * - * @group anno - */ + * + * @group anno + */ override val outputAnnotatorType: AnnotatorType = DOCUMENT /** Input annotator type : IMAGE - * - * @group anno - */ + * + * @group anno + */ override val inputAnnotatorTypes: Array[AnnotatorType] = Array(IMAGE) /** ConfigProto from tensorflow, serialized into byte array. Get with - * config_proto.SerializeToString() - * - * @group param - */ + * config_proto.SerializeToString() + * + * @group param + */ val configProtoBytes = new IntArrayParam( this, "configProtoBytes", "ConfigProto from tensorflow, serialized into byte array. Get with config_proto.SerializeToString()") /** ConfigProto from tensorflow, serialized into byte array. Get with - * config_proto.SerializeToString() - * - * @group setParam - */ + * config_proto.SerializeToString() + * + * @group setParam + */ def setConfigProtoBytes(bytes: Array[Int]): this.type = set(this.configProtoBytes, bytes) /** ConfigProto from tensorflow, serialized into byte array. Get with - * config_proto.SerializeToString() - * - * @group getParam - */ + * config_proto.SerializeToString() + * + * @group getParam + */ def getConfigProtoBytes: Option[Array[Byte]] = get(this.configProtoBytes).map(_.map(_.toByte)) /** It contains TF model signatures for the laded saved model - * - * @group param - */ + * + * @group param + */ val signatures = new MapFeature[String, String](model = this, name = "signatures") /** @group setParam */ @@ -203,9 +197,9 @@ class VisionEncoderDecoderForImageCaptioning(override val uid: String) def getSignatures: Option[Map[String, String]] = get(this.signatures) /** Vocabulary used to encode the words to ids with bpeTokenizer.encode - * - * @group param - */ + * + * @group param + */ protected[nlp] val vocabulary: MapFeature[String, Int] = new MapFeature(this, "vocabulary") /** @group setParam */ @@ -215,9 +209,9 @@ class VisionEncoderDecoderForImageCaptioning(override val uid: String) protected[nlp] def getVocabulary: Map[String, Int] = $$(vocabulary) /** Holding merges.txt for BPE Tokenization - * - * @group param - */ + * + * @group param + */ protected[nlp] val merges: MapFeature[(String, String), Int] = new MapFeature(this, "merges") /** @group setParam */ @@ -236,14 +230,12 @@ class VisionEncoderDecoderForImageCaptioning(override val uid: String) private var _model: Option[Broadcast[VisionEncoderDecoder]] = None - /** @group getParam */ - def getModelIfNotSet: VisionEncoderDecoder = _model.get.value - /** @group setParam */ def setModelIfNotSet( - spark: SparkSession, - tensorflow: TensorflowWrapper, - preprocessor: Preprocessor): this.type = { + spark: SparkSession, + tensorflowWrapper: Option[TensorflowWrapper], + onnxWrapper: Option[EncoderDecoderWithoutPastWrappers], + preprocessor: Preprocessor): this.type = { if (_model.isEmpty) { val tokenizer = BpeTokenizer @@ -253,7 +245,8 @@ class VisionEncoderDecoderForImageCaptioning(override val uid: String) _model = Some( spark.sparkContext.broadcast( new VisionEncoderDecoder( - tensorflow, + tensorflowWrapper, + onnxWrapper, configProtoBytes = getConfigProtoBytes, tokenizer = tokenizer, preprocessor = preprocessor, @@ -263,6 +256,9 @@ class VisionEncoderDecoderForImageCaptioning(override val uid: String) this } + /** @group getParam */ + def getModelIfNotSet: VisionEncoderDecoder = _model.get.value + setDefault( batchSize -> 2, beamSize -> 1, @@ -285,16 +281,16 @@ class VisionEncoderDecoderForImageCaptioning(override val uid: String) topP -> 1.0) /** Takes a document and annotations and produces new annotations of this annotator's annotation - * type - * - * @param batchedAnnotations - * Annotations that correspond to inputAnnotationCols generated by previous annotators if any - * @return - * any number of annotations processed for every input annotation. Not necessary one to one - * relationship - */ + * type + * + * @param batchedAnnotations + * Annotations that correspond to inputAnnotationCols generated by previous annotators if any + * @return + * any number of annotations processed for every input annotation. Not necessary one to one + * relationship + */ override def batchAnnotate( - batchedAnnotations: Seq[Array[AnnotationImage]]): Seq[Seq[Annotation]] = { + batchedAnnotations: Seq[Array[AnnotationImage]]): Seq[Seq[Annotation]] = { // Zip annotations to the row it belongs to val imagesWithRow = batchedAnnotations.zipWithIndex @@ -341,19 +337,34 @@ class VisionEncoderDecoderForImageCaptioning(override val uid: String) } override def onWrite(path: String, spark: SparkSession): Unit = { - writeTensorflowModelV2( - path, - spark, - getModelIfNotSet.tensorflowWrapper, - "_image_classification", - VisionEncoderDecoderForImageCaptioning.tfFile, - configProtoBytes = getConfigProtoBytes) - } + getEngine match { + case TensorFlow.name => + writeTensorflowModelV2( + path, + spark, + getModelIfNotSet.tensorflowWrapper.get, + VisionEncoderDecoderForImageCaptioning.suffix, + VisionEncoderDecoderForImageCaptioning.tfFile, + configProtoBytes = getConfigProtoBytes) + case ONNX.name => + val wrappers = getModelIfNotSet.onnxWrappers.get + writeOnnxModels( + path, + spark, + Seq((wrappers.encoder, "encoder_model.onnx")), + VisionEncoderDecoderForImageCaptioning.suffix) + writeOnnxModels( + path, + spark, + Seq((wrappers.decoder, "decoder_model.onnx")), + VisionEncoderDecoderForImageCaptioning.suffix) + } + } } trait ReadablePretrainedVisionEncoderDecoderModel - extends ParamsAndFeaturesReadable[VisionEncoderDecoderForImageCaptioning] + extends ParamsAndFeaturesReadable[VisionEncoderDecoderForImageCaptioning] with HasPretrained[VisionEncoderDecoderForImageCaptioning] { override val defaultModelName: Some[String] = Some("image_captioning_vit_gpt2") @@ -367,23 +378,25 @@ trait ReadablePretrainedVisionEncoderDecoderModel super.pretrained(name, lang) override def pretrained( - name: String, - lang: String, - remoteLoc: String): VisionEncoderDecoderForImageCaptioning = + name: String, + lang: String, + remoteLoc: String): VisionEncoderDecoderForImageCaptioning = super.pretrained(name, lang, remoteLoc) } -trait ReadVisionEncoderDecoderDLModel extends ReadTensorflowModel { +trait ReadVisionEncoderDecoderDLModel + extends ReadTensorflowModel + with ReadOnnxModel { this: ParamsAndFeaturesReadable[VisionEncoderDecoderForImageCaptioning] => override val tfFile: String = "vision_encoder_decoder_tensorflow" + override val onnxFile: String = "vision_encoder_decoder_onnx" + val suffix = "_image_classification" + def readModel( + instance: VisionEncoderDecoderForImageCaptioning, + path: String, + spark: SparkSession): Unit = { - def readTensorflow( - instance: VisionEncoderDecoderForImageCaptioning, - path: String, - spark: SparkSession): Unit = { - - val tf = readTensorflowModel(path, spark, "_vision_encoder_decoder_tf") val preprocessor = Preprocessor( do_normalize = instance.getDoNormalize, @@ -396,26 +409,47 @@ trait ReadVisionEncoderDecoderDLModel extends ReadTensorflowModel { rescale_factor = instance.getRescaleFactor, size = instance.getSize) - instance.setModelIfNotSet(spark, tf, preprocessor) + instance.getEngine match { + case TensorFlow.name => + val tf = readTensorflowModel(path, spark, "_vision_encoder_decoder_tf") + instance.setModelIfNotSet(spark, Some(tf), None, preprocessor) + + case ONNX.name => + val wrappers = + readOnnxModels( + path, + spark, + Seq("encoder_model.onnx", "decoder_model.onnx"), + VisionEncoderDecoderForImageCaptioning.suffix, + dataFilePostfix = ".onnx_data") + + val onnxWrappers = EncoderDecoderWithoutPastWrappers( + wrappers("encoder_model.onnx"), + decoder = wrappers("decoder_model.onnx")) + + instance.setModelIfNotSet(spark, None, Some(onnxWrappers), preprocessor) + case _ => + throw new Exception(notSupportedEngineError) + } } - addReader(readTensorflow) + addReader(readModel) /** Loads a local SavedModel file of the model. For VisionEncoderDecoder, requires also image - * preprocessor config and vocab file. - * - * @param modelPath - * Path of the Model - * @param spark - * Spark Instance - * @return - */ + * preprocessor config and vocab file. + * + * @param modelPath + * Path of the Model + * @param spark + * Spark Instance + * @return + */ def loadSavedModel( - modelPath: String, - spark: SparkSession): VisionEncoderDecoderForImageCaptioning = { + modelPath: String, + spark: SparkSession): VisionEncoderDecoderForImageCaptioning = { implicit val formats: DefaultFormats.type = DefaultFormats // for json4s - val (localModelPath, detectedEngine) = modelSanityCheck(modelPath) + val (localModelPath, detectedEngine) = modelSanityCheck(modelPath, isEncoderDecoder = true) val vocab = { val json = loadJsonStringAsset(localModelPath, "vocab.json") @@ -490,7 +524,7 @@ trait ReadVisionEncoderDecoderDLModel extends ReadTensorflowModel { detectedEngine match { case TensorFlow.name => - val (wrapper, signatures) = + val (tfWrapper, signatures) = TensorflowWrapper.read(localModelPath, zipped = false, useBundle = true) val _signatures = signatures match { @@ -499,11 +533,37 @@ trait ReadVisionEncoderDecoderDLModel extends ReadTensorflowModel { } /** the order of setSignatures is important if we use getSignatures inside - * setModelIfNotSet - */ + * setModelIfNotSet + */ annotatorModel .setSignatures(_signatures) - .setModelIfNotSet(spark, wrapper, preprocessorConfig) + .setModelIfNotSet(spark, Some(tfWrapper), None, preprocessorConfig) + + case ONNX.name => + val onnxWrapperEncoder = + OnnxWrapper.read( + spark, + localModelPath, + zipped = false, + useBundle = true, + modelName = "encoder_model", + onnxFileSuffix = None) + + val onnxWrapperDecoder = + OnnxWrapper.read( + spark, + localModelPath, + zipped = false, + useBundle = true, + modelName = "decoder_model", + onnxFileSuffix = None) + + val onnxWrappers = EncoderDecoderWithoutPastWrappers( + onnxWrapperEncoder, + onnxWrapperDecoder) + + annotatorModel + .setModelIfNotSet(spark, None, Some(onnxWrappers), preprocessorConfig) case _ => throw new Exception(notSupportedEngineError) @@ -514,8 +574,8 @@ trait ReadVisionEncoderDecoderDLModel extends ReadTensorflowModel { } /** This is the companion object of [[VisionEncoderDecoderForImageCaptioning]]. Please refer to - * that class for the documentation. - */ + * that class for the documentation. + */ object VisionEncoderDecoderForImageCaptioning - extends ReadablePretrainedVisionEncoderDecoderModel + extends ReadablePretrainedVisionEncoderDecoderModel with ReadVisionEncoderDecoderDLModel